fmda/moisture_rnn.py

   1 # v2 training and prediction class infrastructure
   2
   3 # Environment
   4 import random
   5 import numpy as np
   6 import pandas as pd
   7 import tensorflow as tf
   8 import matplotlib.pyplot as plt
   9 import sys
  10 from tensorflow.keras.callbacks import Callback, EarlyStopping, TerminateOnNaN
  11 # from sklearn.metrics import mean_squared_error
  12 import logging
  13 from tensorflow.keras.layers import LSTM, SimpleRNN, Input, Dropout, Dense
  14 # Local modules
  15 import reproducibility
  16 # from utils import print_dict_summary
  17 from abc import ABC, abstractmethod
  18 from utils import hash2, all_items_exist, hash_ndarray, hash_weights
  19 from data_funcs import rmse, plot_data, compare_dicts
  20 import copy
  21 # import yaml
  22 from sklearn.preprocessing import MinMaxScaler, StandardScaler
  23 import warnings
  24
  25 #*************************************************************************************
  26 # Data Formatting Functions
  27
  28 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
  29     # x [datapoints,features]    all inputs
  30     # y [datapoints,outputs]
  31     # timesteps: split x and y into samples length timesteps, shifted by 1
  32     # datapoints: number of timesteps to use for training, no more than y.shape[0]
  33     if verbose:
  34         print('staircase: shape x = ',x.shape)
  35         print('staircase: shape y = ',y.shape)
  36         print('staircase: timesteps=',timesteps)
  37         print('staircase: datapoints=',datapoints)
  38         print('staircase: return_sequences=',return_sequences)
  39     outputs = y.shape[1]
  40     features = x.shape[1]
  41     samples = datapoints-timesteps+1
  42     if verbose:
  43         print('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
  44     x_train = np.empty([samples, timesteps, features])
  45     if return_sequences:
  46         if verbose:
  47             print('returning all timesteps in a sample')
  48         y_train = np.empty([samples, timesteps, outputs])  # all
  49         for i in range(samples):
  50             for k in range(timesteps):
  51                 x_train[i,k,:] = x[i+k,:]
  52                 y_train[i,k,:] = y[i+k,:]
  53     else:
  54         if verbose:
  55             print('returning only the last timestep in a sample')
  56         y_train = np.empty([samples, outputs])
  57         for i in range(samples):
  58             for k in range(timesteps):
  59                 x_train[i,k,:] = x[i+k,:]
  60             y_train[i,:] = y[i+timesteps-1,:]
  61
  62     return x_train, y_train
  63
  64 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = False):
  65     # create RNN training data in multiple batches
  66     # input:
  67     #     x (,features)
  68     #     y (,outputs)
  69     #     timesteps: split x and y into sequences length timesteps
  70     #                a.k.a. lookback or sequence_length
  71
  72     # print params if verbose
  73
  74     if batch_size is None:
  75         raise ValueError('staircase_2 requires batch_size')
  76     if verbose:
  77         print('staircase_2: shape x = ',x.shape)
  78         print('staircase_2: shape y = ',y.shape)
  79         print('staircase_2: timesteps=',timesteps)
  80         print('staircase_2: batch_size=',batch_size)
  81         print('staircase_2: return_sequences=',return_sequences)
  82
  83     nx,features= x.shape
  84     ny,outputs = y.shape
  85     datapoints = min(nx,ny,trainsteps)
  86     if verbose:
  87         print('staircase_2: datapoints=',datapoints)
  88
  89     # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
  90     # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
  91
  92     # example with timesteps=3 batch_size=3 datapoints=15
  93     #     batch 0: [0 1 2]      [1 2 3]      [2 3 4]
  94     #     batch 1: [3 4 5]      [4 5 6]      [5 6 7]
  95     #     batch 2: [6 7 8]      [7 8 9]      [8 9 10]
  96     #     batch 3: [9 10 11]    [10 11 12]   [11 12 13]
  97     #     batch 4: [12 13 14]   [13 14 15]    when runs out this is the last batch, can be shorter
  98     #
  99     # TODO: implement for multiple locations, same starting time for each batch
 100     #              Loc 1         Loc 2       Loc 3
 101     #     batch 0: [0 1 2]      [0 1 2]      [0 1 2]
 102     #     batch 1: [3 4 5]      [3 4 5]      [3 4 5]
 103     #     batch 2: [6 7 8]      [6 7 8]      [6 7 8]
 104     # TODO: second epoch shift starting time at batch 0 in time
 105
 106     # TODO: implement for multiple locations, different starting times for each batch
 107     #              Loc 1       Loc 2       Loc 3
 108     #     batch 0: [0 1 2]   [1 2 3]      [2 3 4]
 109     #     batch 1: [3 4 5]   [4 5 6]      [5 6 57
 110     #     batch 2: [6 7 8]   [7 8 9]      [8 9 10]
 111
 112     #
 113     #     the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
 114     #     e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
 115     #     becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
 116     #
 117     #     sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
 118     #     the state at time 0 is fixed but the state is considered a variable at times 1 and 2
 119     #     the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
 120     #     sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5    #     the state at time 3 is fixed to the output of the first sequence [0 1 2]
 121     #     the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
 122     #     how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
 123     #     there is only one set of weights   h(2) = f(h(1),w)  h(1) = f(h(0),w)   but w is always the same
 124     #     each column is a one successive evaluation of h(n+1) = f(h(n),w)  for n = n_startn n_start+1,...
 125     #     the cannot be evaluated efficiently on gpu because gpu is a parallel processor
 126     #     this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
 127     #     each batch consists of independent calculations
 128     #     but it can depend on the result of the previous batch (that's the recurrent parr)
 129
 130
 131
 132     max_batches = datapoints // timesteps
 133     max_sequences = max_batches * batch_size
 134
 135     if verbose:
 136         print('staircase_2: max_batches=',max_batches)
 137         print('staircase_2: max_sequences=',max_sequences)
 138
 139     x_train = np.zeros((max_sequences, timesteps, features))
 140     if return_sequences:
 141         y_train = np.empty((max_sequences, timesteps, outputs))
 142     else:
 143         y_train = np.empty((max_sequences, outputs ))
 144
 145     # build the sequences
 146     k=0
 147     for i in range(max_batches):
 148         for j in range(batch_size):
 149             begin = i*timesteps + j
 150             next  = begin + timesteps
 151             if next > datapoints:
 152                 break
 153             if verbose:
 154                 print('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
 155             x_train[k,:,:] = x[begin:next,:]
 156             if return_sequences:
 157                  y_train[k,:,:] = y[begin:next,:]
 158             else:
 159                  y_train[k,:] = y[next-1,:]
 160             k += 1
 161     if verbose:
 162         print('staircase_2: shape x_train = ',x_train.shape)
 163         print('staircase_2: shape y_train = ',y_train.shape)
 164         print('staircase_2: sequences generated',k)
 165         print('staircase_2: batch_size=',batch_size)
 166     k = (k // batch_size) * batch_size
 167     if verbose:
 168         print('staircase_2: removing partial and empty batches at the end, keeping',k)
 169     x_train = x_train[:k,:,:]
 170     if return_sequences:
 171          y_train = y_train[:k,:,:]
 172     else:
 173          y_train = y_train[:k,:]
 174
 175     if verbose:
 176         print('staircase_2: shape x_train = ',x_train.shape)
 177         print('staircase_2: shape y_train = ',y_train.shape)
 178
 179     return x_train, y_train
 180
 181
 182 # Dictionary of scalers, used to avoid multiple object creation and to avoid multiple if statements
 183 scalers = {
 184     'minmax': MinMaxScaler(),
 185     'standard': StandardScaler()
 186 }
 187
 188
 189 def batch_setup(ids, batch_size):
 190     """
 191     Sets up stateful batched training data scheme for RNN training.
 192
 193     This function takes a list or array of identifiers (`ids`) and divides them into batches of a specified size (`batch_size`). If the last batch does not have enough elements to meet the `batch_size`, the function will loop back to the start of the identifiers and continue filling the batch until it reaches the required size.
 194
 195     Parameters:
 196     -----------
 197     ids : list or numpy array
 198         A list or numpy array containing the ids to be batched.
 199
 200     batch_size : int
 201         The desired size of each batch.
 202
 203     Returns:
 204     --------
 205     batches : list of lists
 206         A list where each element is a batch (itself a list) of identifiers. Each batch will contain exactly `batch_size` elements.
 207
 208     Example:
 209     --------
 210     >>> ids = [1, 2, 3, 4, 5]
 211     >>> batch_size = 3
 212     >>> batch_setup(ids, batch_size)
 213     [[1, 2, 3], [4, 5, 1]]
 214
 215     Notes:
 216     ------
 217     - If `ids` is shorter than `batch_size`, the returned list will contain a single batch where identifiers are repeated from the start of `ids` until the batch is filled.
 218     """
 219     # Ensure ids is a numpy array
 220     x = np.array(ids)
 221
 222     # Initialize the list to hold the batches
 223     batches = []
 224
 225     # Use a loop to slice the list/array into batches
 226     for i in range(0, len(x), batch_size):
 227         batch = list(x[i:i + batch_size])
 228
 229         # If the batch is not full, continue from the start
 230         while len(batch) < batch_size:
 231             # Calculate the remaining number of items needed
 232             remaining = batch_size - len(batch)
 233             # Append the needed number of items from the start of the array
 234             batch.extend(x[:remaining])
 235
 236         batches.append(batch)
 237
 238     return batches
 239
 240 def staircase_spatial(X, y, batch_size, timesteps, hours=None, start_times = None, verbose = True):
 241     """
 242     Prepares spatially formatted time series data for RNN training by creating batches of sequences across different locations, stacked to be compatible with stateful models.
 243
 244     This function processes multi-location time series data by slicing it into batches and formatting it to fit into a recurrent neural network (RNN) model. It utilizes a staircase-like approach to prepare sequences for each location and then interlaces them to align with stateful RNN structures.
 245
 246     Parameters:
 247     -----------
 248     X : list of numpy arrays
 249         A list where each element is a numpy array containing features for a specific location. The shape of each array is `(total_time_steps, features)`.
 250
 251     y : list of numpy arrays
 252         A list where each element is a numpy array containing the target values for a specific location. The shape of each array is `(total_time_steps,)`.
 253
 254     batch_size : int
 255         The number of sequences to include in each batch.
 256
 257     timesteps : int
 258         The number of time steps to include in each sequence for the RNN.
 259
 260     hours : int, optional
 261         The length of each time series to consider for each location. If `None`, it defaults to the minimum length of `y` across all locations.
 262
 263     start_times : numpy array, optional
 264         The initial time step for each location. If `None`, it defaults to an array starting from 0 and incrementing by 1 for each location.
 265
 266     verbose : bool, optional
 267         If `True`, prints additional information during processing. Default is `True`.
 268
 269     Returns:
 270     --------
 271     XX : numpy array
 272         A 3D numpy array with shape `(total_sequences, timesteps, features)` containing the prepared feature sequences for all locations.
 273
 274     yy : numpy array
 275         A 2D numpy array with shape `(total_sequences, 1)` containing the corresponding target values for all locations.
 276
 277     n_seqs : int
 278         Number of sequences per location. Used to reset states when location changes. Hidden state of RNN will be reset after n_seqs number of batches
 279
 280     Notes:
 281     ------
 282     - The function handles spatially distributed time series data by batching and formatting it for stateful RNNs.
 283     - `hours` determines how much of the time series is used for each location. If not provided, it defaults to the shortest series in `y`.
 284     - If `start_times` is not provided, it assumes each location starts its series at progressively later time steps.
 285     - The `batch_setup` function is used internally to manage the creation of location and time step batches.
 286     - The returned feature sequences `XX` and target sequences `yy` are interlaced to align with the expected input format of stateful RNNs.
 287     """
 288
 289     # Generate ids based on number of distinct timeseries provided
 290     n_loc = len(y) # assuming each list entry for y is a separate location
 291     loc_ids = np.arange(n_loc)
 292
 293     # Generate hours and start_times if None
 294     if hours is None:
 295         print("Setting total hours to minimum length of y in provided dictionary")
 296         hours = min(len(yi) for yi in y)
 297     if start_times is None:
 298         print("Setting Start times to offset by 1 hour by location")
 299         start_times = np.arange(n_loc)
 300     # Set up batches
 301     loc_batch, t_batch =  batch_setup(loc_ids, batch_size), batch_setup(start_times, batch_size)
 302     if verbose:
 303         print(f"Location ID Batches: {loc_batch}")
 304         print(f"Start Times for Batches: {t_batch}")
 305
 306     # Loop over batches and construct with staircase_2
 307     Xs = []
 308     ys = []
 309     for i in range(0, len(loc_batch)):
 310         locs_i = loc_batch[i]
 311         ts = t_batch[i]
 312         for j in range(0, len(locs_i)):
 313             t0 = ts[j]
 314             tend = t0 + hours
 315             # Create RNNData Dict
 316             # Subset data to given location and time from t0 to t0+hours
 317             k = locs_i[j] # Used to account for fewer locations than batch size
 318             X_temp = X[k][t0:tend,:]
 319             y_temp = y[k][t0:tend].reshape(-1,1)
 320
 321             # Format sequences
 322             Xi, yi = staircase_2(
 323                 X_temp,
 324                 y_temp,
 325                 timesteps = timesteps,
 326                 batch_size = 1,  # note: using 1 here to format sequences for a single location, not same as target batch size for training data
 327                 verbose=False)
 328
 329             Xs.append(Xi)
 330             ys.append(yi)
 331
 332     # Drop incomplete batches
 333     lens = [yi.shape[0] for yi in ys]
 334     n_seqs = min(lens)
 335     if verbose:
 336         print(f"Minimum number of sequences by location: {n_seqs}")
 337         print(f"Applying minimum length to other arrays.")
 338     Xs = [Xi[:n_seqs] for Xi in Xs]
 339     ys = [yi[:n_seqs] for yi in ys]
 340
 341     # Interlace arrays to match stateful structure
 342     n_features = Xi.shape[2]
 343     XXs = []
 344     yys = []
 345     for i in range(0, len(loc_batch)):
 346         locs_i = loc_batch[i]
 347         XXi = np.empty((Xs[0].shape[0]*batch_size, 5, n_features))
 348         yyi = np.empty((Xs[0].shape[0]*batch_size, 1))
 349         for j in range(0, len(locs_i)):
 350             XXi[j::(batch_size)] =  Xs[locs_i[j]]
 351             yyi[j::(batch_size)] =  ys[locs_i[j]]
 352         XXs.append(XXi)
 353         yys.append(yyi)
 354     yy = np.concatenate(yys, axis=0)
 355     XX = np.concatenate(XXs, axis=0)
 356
 357     if verbose:
 358         print(f"Spatially Formatted X Shape: {XX.shape}")
 359         print(f"Spatially Formatted X Shape: {yy.shape}")
 360
 361
 362     return XX, yy, n_seqs
 363
 364 #***********************************************************************************************
 365 ### RNN Class Functionality
 366
 367 class RNNParams(dict):
 368     """
 369     A custom dictionary class for handling RNN parameters. Automatically calculates certain params based on others. Overwrites the update method to protect from incompatible parameter choices. Inherits from dict
 370     """
 371     def __init__(self, input_dict):
 372         """
 373         Initializes the RNNParams instance and runs checks and shape calculations.
 374
 375         Parameters:
 376         -----------
 377         input_dict : dict,
 378             A dictionary containing RNN parameters.
 379         """
 380         super().__init__(input_dict)
 381         # Automatically run checks on initialization
 382         self.run_checks()
 383         # Automatically calculate shapes on initialization
 384         self.calc_param_shapes()
 385     def run_checks(self, verbose=True):
 386         """
 387         Validates that required keys exist and are of the correct type.
 388
 389         Parameters:
 390         -----------
 391         verbose : bool, optional
 392             If True, prints status messages. Default is True.
 393         """
 394         print("Checking params...")
 395         # Keys must exist and be integers
 396         int_keys = [
 397             'batch_size', 'timesteps', 'rnn_layers',
 398             'rnn_units', 'dense_layers', 'dense_units', 'epochs'
 399         ]
 400
 401         for key in int_keys:
 402             assert key in self, f"Missing required key: {key}"
 403             assert isinstance(self[key], int), f"Key '{key}' must be an integer"
 404
 405         # Keys must exist and be lists
 406         list_keys = ['activation', 'features_list', 'dropout']
 407         for key in list_keys:
 408             assert key in self, f"Missing required key: {key}"
 409             assert isinstance(self[key], list), f"Key '{key}' must be a list"
 410
 411         # Keys must exist and be floats
 412         float_keys = ['learning_rate', 'train_frac', 'val_frac']
 413         for key in float_keys:
 414             assert key in self, f"Missing required key: {key}"
 415             assert isinstance(self[key], float), f"Key '{key}' must be a float"
 416
 417         print("Input dictionary passed all checks.")
 418     def calc_param_shapes(self, verbose=True):
 419         """
 420         Calculates and updates the shapes of certain parameters based on input data.
 421
 422         Parameters:
 423         -----------
 424         verbose : bool, optional
 425             If True, prints status messages. Default is True.
 426         """
 427         if verbose:
 428             print("Calculating shape params based on features list, timesteps, and batch size")
 429             print(f"Input Feature List: {self['features_list']}")
 430             print(f"Input Timesteps: {self['timesteps']}")
 431             print(f"Input Batch Size: {self['batch_size']}")
 432
 433         n_features = len(self['features_list'])
 434         batch_shape = (self["batch_size"], self["timesteps"], n_features)
 435         if verbose:
 436             print("Calculated params:")
 437             print(f"Number of features: {n_features}")
 438             print(f"Batch Shape: {batch_shape}")
 439
 440         # Update the dictionary
 441         super().update({
 442             'n_features': n_features,
 443             'batch_shape': batch_shape
 444         })
 445         if verbose:
 446             print(self)
 447
 448     def update(self, *args, verbose=True, **kwargs):
 449         """
 450         Overwrites the standard update functon from dict. This is to prevent certain keys from being modified directly and to automatically update keys to be compatible with each other. The keys handled relate to the shape of the input data to the RNN.
 451
 452         Parameters:
 453         -----------
 454         verbose : bool, optional
 455             If True, prints status messages. Default is True.
 456         """
 457         # Prevent updating n_features and batch_shape
 458         restricted_keys = {'n_features', 'batch_shape'}
 459         keys_to_check = {'features_list', 'timesteps', 'batch_size'}
 460
 461         # Check for restricted keys in args
 462         if args:
 463             if isinstance(args[0], dict):
 464                 if restricted_keys & args[0].keys():
 465                     raise KeyError(f"Cannot directly update keys: {restricted_keys & args[0].keys()}, \n Instead update one of: {keys_to_check}")
 466             elif isinstance(args[0], (tuple, list)) and all(isinstance(i, tuple) and len(i) == 2 for i in args[0]):
 467                 if restricted_keys & {k for k, v in args[0]}:
 468                     raise KeyError(f"Cannot directly update keys: {restricted_keys & {k for k, v in args[0]}}, \n Instead update one of: {keys_to_check}")
 469
 470         # Check for restricted keys in kwargs
 471         if restricted_keys & kwargs.keys():
 472             raise KeyError(f"Cannot update restricted keys: {restricted_keys & kwargs.keys()}")
 473
 474
 475         # Track if specific keys are updated
 476         keys_updated = set()
 477
 478         # Update using the standard dict update method
 479         if args:
 480             if isinstance(args[0], dict):
 481                 keys_updated.update(args[0].keys() & keys_to_check)
 482             elif isinstance(args[0], (tuple, list)) and all(isinstance(i, tuple) and len(i) == 2 for i in args[0]):
 483                 keys_updated.update(k for k, v in args[0] if k in keys_to_check)
 484
 485         if kwargs:
 486             keys_updated.update(kwargs.keys() & keys_to_check)
 487
 488         # Call the parent update method
 489         super().update(*args, **kwargs)
 490
 491         # Recalculate shapes if necessary
 492         if keys_updated:
 493             self.calc_param_shapes(verbose=verbose)
 494
 495
 496 ## Class for handling input data
 497 class RNNData(dict):
 498     """
 499     A custom dictionary class for managing RNN data, with validation, scaling, and train-test splitting functionality.
 500     """
 501     required_keys = {"loc", "time", "X", "y", "features_list"}
 502     def __init__(self, input_dict, scaler=None, features_list=None):
 503         """
 504         Initializes the RNNData instance, performs checks, and prepares data.
 505
 506         Parameters:
 507         -----------
 508         input_dict : dict
 509             A dictionary containing the initial data.
 510         scaler : str, optional
 511             The name of the scaler to be used (e.g., 'minmax', 'standard'). Default is None.
 512         features_list : list, optional
 513             A subset of features to be used. Default is None which means all features.
 514         """
 515
 516         # Copy to avoid changing external input
 517         input_data = input_dict.copy()
 518         # Initialize inherited dict class
 519         super().__init__(input_data)
 520
 521         # Check if input data is one timeseries dataset or multiple
 522         if type(self.loc['STID']) == str:
 523             self.spatial = False
 524             print("Input data is single timeseries.")
 525         elif type(self.loc['STID']) == list:
 526             self.spatial = True
 527             print("Input data from multiple timeseries.")
 528         else:
 529             raise KeyError(f"Input locations not list or single string")
 530
 531         # Set up Data Scaling
 532         self.scaler = None
 533         if scaler is not None:
 534             self.set_scaler(scaler)
 535
 536         # Rename and define other stuff.
 537         if self.spatial:
 538             self['hours'] = min(arr.shape[0] for arr in self.y)
 539         else:
 540             self['hours'] = len(self['y'])
 541
 542         self['all_features_list'] = self.pop('features_list')
 543         if features_list is None:
 544             print("Using all input features.")
 545             self.features_list = self.all_features_list
 546         else:
 547             self.features_list = features_list
 548         # self.run_checks()
 549         self.__dict__.update(self)
 550
 551     # TODO: Fix checks for multilocation
 552     def run_checks(self, verbose=True):
 553         """
 554         Validates that required keys are present and checks the integrity of data shapes.
 555
 556         Parameters:
 557         -----------
 558         verbose : bool, optional
 559             If True, prints status messages. Default is True.
 560         """
 561         missing_keys = self.required_keys - self.keys()
 562         if missing_keys:
 563             raise KeyError(f"Missing required keys: {missing_keys}")
 564         # # Check y 1-d
 565         # y_shape = np.shape(self.y)
 566         # if not (len(y_shape) == 1 or (len(y_shape) == 2 and y_shape[1] == 1)):
 567         #     raise ValueError(f"'y' must be one-dimensional, with shape (N,) or (N, 1). Current shape is {y_shape}.")
 568
 569         # # Check if 'hours' is provided and matches len(y)
 570         # if 'hours' in self:
 571         #     if self.hours != len(self.y):
 572         #         raise ValueError(f"Provided 'hours' value {self.hours} does not match the length of 'y', which is {len(self.y)}.")
 573         # Check desired subset of features is in all input features
 574         if not all_items_exist(self.features_list, self.all_features_list):
 575             raise ValueError(f"Provided 'features_list' {self.features_list} has elements not in input features.")
 576     def set_scaler(self, scaler):
 577         """
 578         Sets the scaler to be used for data normalization.
 579
 580         Parameters:
 581         -----------
 582         scaler : str
 583             The name of the scaler (e.g., 'minmax', 'standard').
 584         """
 585         recognized_scalers = ['minmax', 'standard']
 586         if scaler in recognized_scalers:
 587             print(f"Setting data scaler: {scaler}")
 588             self.scaler = scalers[scaler]
 589         else:
 590             raise ValueError(f"Unrecognized scaler '{scaler}'. Recognized scalers are: {recognized_scalers}.")
 591     def train_test_split(self, train_frac, val_frac=0.0, subset_features=True, features_list=None, split_space=False, verbose=True):
 592         """
 593         Splits the data into training, validation, and test sets.
 594
 595         Parameters:
 596         -----------
 597         train_frac : float
 598             The fraction of data to be used for training.
 599         val_frac : float, optional
 600             The fraction of data to be used for validation. Default is 0.0.
 601         subset_features : bool, optional
 602             If True, subsets the data to the specified features list. Default is True.
 603         features_list : list, optional
 604             A list of features to use for subsetting. Default is None.
 605         split_space : bool, optional
 606             Whether to split the data based on space. Default is False.
 607         verbose : bool, optional
 608             If True, prints status messages. Default is True.
 609         """
 610         # Indicate whether multi timeseries or not
 611         spatial = self.spatial
 612
 613         # Extract data to desired features, copy to avoid changing input objects
 614         X = self.X.copy()
 615         y = self.y.copy()
 616         if subset_features:
 617             if verbose and self.features_list != self.all_features_list:
 618                 print(f"Subsetting input data to features_list: {self.features_list}")
 619             # Indices to subset all features with based on params features
 620             indices = []
 621             for item in self.features_list:
 622                 if item in self.all_features_list:
 623                     indices.append(self.all_features_list.index(item))
 624                 else:
 625                     print(f"Warning: feature name '{item}' not found in list of all features from input data")
 626             if spatial:
 627                 X = [Xi[:, indices] for Xi in X]
 628             else:
 629                 X = X[:, indices]
 630
 631         # Setup train/test in time
 632         train_ind = int(np.floor(self.hours * train_frac)); self.train_ind = train_ind
 633         test_ind= int(train_ind + round(self.hours * val_frac)); self.test_ind = test_ind
 634
 635         # Check for any potential issues with indices
 636         if test_ind > self.hours:
 637             print(f"Setting test index to {self.hours}")
 638             test_ind = self.hours
 639         if train_ind >= test_ind:
 640             raise ValueError("Train index must be less than test index.")
 641
 642         # Training data from 0 to train_ind
 643         # Validation data from train_ind to test_ind
 644         # Test data from test_ind to end
 645         if spatial:
 646             self.X_train = [Xi[:train_ind] for Xi in X]
 647             self.y_train = [yi[:train_ind].reshape(-1,1) for yi in y]
 648             if val_frac >0:
 649                 self.X_val = [Xi[train_ind:test_ind] for Xi in X]
 650                 self.y_val = [yi[train_ind:test_ind].reshape(-1,1) for yi in y]
 651             self.X_test = [Xi[test_ind:] for Xi in X]
 652             self.y_test = [yi[test_ind:].reshape(-1,1) for yi in y]
 653         else:
 654             self.X_train = X[:train_ind]
 655             self.y_train = y[:train_ind].reshape(-1,1) # assumes y 1-d, change this if vector output
 656             if val_frac >0:
 657                 self.X_val = X[train_ind:test_ind]
 658                 self.y_val = y[train_ind:test_ind].reshape(-1,1) # assumes y 1-d, change this if vector output
 659             self.X_test = X[test_ind:]
 660             self.y_test = y[test_ind:].reshape(-1,1) # assumes y 1-d, change this if vector output
 661
 662
 663
 664         # Print statements if verbose
 665         if verbose:
 666             print(f"Train index: 0 to {train_ind}")
 667             print(f"Validation index: {train_ind} to {test_ind}")
 668             print(f"Test index: {test_ind} to {self.hours}")
 669
 670             if spatial:
 671                 print(f"X_train[0] shape: {self.X_train[0].shape}, y_train[0] shape: {self.y_train[0].shape}")
 672                 print(f"X_val[0] shape: {self.X_val[0].shape}, y_val[0] shape: {self.y_val[0].shape}")
 673                 print(f"X_test[0] shape: {self.X_test[0].shape}, y_test[0] shape: {self.y_test[0].shape}")
 674             else:
 675                 print(f"X_train shape: {self.X_train.shape}, y_train shape: {self.y_train.shape}")
 676                 print(f"X_val shape: {self.X_val.shape}, y_val shape: {self.y_val.shape}")
 677                 print(f"X_test shape: {self.X_test.shape}, y_test shape: {self.y_test.shape}")
 678     # def train_test_split(self, time_fracs=[1.,0.,0.], space_fracs=[1.,0.,0.], subset_features=True, features_list=None, verbose=True):
 679     #     """
 680     #     Splits the data into training, validation, and test sets.
 681
 682     #     Parameters:
 683     #     -----------
 684     #     train_frac : float
 685     #         The fraction of data to be used for training.
 686     #     val_frac : float, optional
 687     #         The fraction of data to be used for validation. Default is 0.0.
 688     #     subset_features : bool, optional
 689     #         If True, subsets the data to the specified features list. Default is True.
 690     #     features_list : list, optional
 691     #         A list of features to use for subsetting. Default is None.
 692     #     split_space : bool, optional
 693     #         Whether to split the data based on space. Default is False.
 694     #     verbose : bool, optional
 695     #         If True, prints status messages. Default is True.
 696     #     """
 697     #     # Indicate whether multi timeseries or not
 698     #     spatial = self.spatial
 699
 700     #     # Set up
 701     #     assert np.sum(time_fracs) == np.sum(space_fracs) == 1., f"Provided cross validation params don't sum to 1"
 702     #     if (len(time_fracs) != 3) or (len(space_fracs) != 3):
 703     #         raise ValueError("Cross-validation params `time_fracs` and `space_fracs` must be lists of length 3, representing (train/validation/test)")
 704
 705     #     train_frac = time_fracs[0]
 706     #     val_frac = time_fracs[1]
 707     #     test_frac = time_fracs[2]
 708
 709     #     # Setup train/val/test in time
 710     #     train_ind = int(np.floor(self.hours * train_frac)); self.train_ind = train_ind
 711     #     test_ind= int(train_ind + round(self.hours * val_frac)); self.test_ind = test_ind
 712     #     # Check for any potential issues with indices
 713     #     if test_ind > self.hours:
 714     #         print(f"Setting test index to {self.hours}")
 715     #         test_ind = self.hours
 716     #     if train_ind > test_ind:
 717     #         raise ValueError("Train index must be less than test index.")
 718
 719     #     # Setup train/val/test in space
 720     #     if spatial:
 721     #         train_frac_sp = space_fracs[0]
 722     #         val_frac_sp = space_fracs[1]
 723     #         locs = np.arange(len(self.loc['STID'])) # indices of locations
 724     #         train_size = int(len(locs) * train_frac_sp)
 725     #         val_size = int(len(locs) * val_frac_sp)
 726     #         random.shuffle(locs)
 727     #         train_locs = locs[:train_size]
 728     #         val_locs = locs[train_size:train_size + val_size]
 729     #         test_locs = locs[train_size + val_size:]
 730     #         # Store Lists of IDs in loc subdirectory
 731     #         self.loc['train_locs'] = [self.loc['STID'][i] for i in train_locs]
 732     #         self.loc['val_locs'] = [self.loc['STID'][i] for i in val_locs]
 733     #         self.loc['test_locs'] = [self.loc['STID'][i] for i in test_locs]
 734
 735
 736     #     # Extract data to desired features, copy to avoid changing input objects
 737     #     X = self.X.copy()
 738     #     y = self.y.copy()
 739     #     if subset_features:
 740     #         if verbose and self.features_list != self.all_features_list:
 741     #             print(f"Subsetting input data to features_list: {self.features_list}")
 742     #         # Indices to subset all features with based on params features
 743     #         indices = []
 744     #         for item in self.features_list:
 745     #             if item in self.all_features_list:
 746     #                 indices.append(self.all_features_list.index(item))
 747     #             else:
 748     #                 print(f"Warning: feature name '{item}' not found in list of all features from input data")
 749     #         if spatial:
 750     #             X = [Xi[:, indices] for Xi in X]
 751     #         else:
 752     #             X = X[:, indices]
 753
 754     #     # Training data from 0 to train_ind
 755     #     # Validation data from train_ind to test_ind
 756     #     # Test data from test_ind to end
 757     #     if spatial:
 758     #         X_train = [X[i] for i in train_locs]
 759     #         X_val = [X[i] for i in val_locs]
 760     #         X_test = [X[i] for i in test_locs]
 761     #         y_train = [y[i] for i in train_locs]
 762     #         y_val = [y[i] for i in val_locs]
 763     #         y_test = [y[i] for i in test_locs]
 764
 765     #         self.X_train = [Xi[:train_ind] for Xi in X_train]
 766     #         self.y_train = [yi[:train_ind].reshape(-1,1) for yi in y_train]
 767     #         if (val_frac >0) and (val_frac_sp)>0:
 768     #             self.X_val = [Xi[train_ind:test_ind] for Xi in X_val]
 769     #             self.y_val = [yi[train_ind:test_ind].reshape(-1,1) for yi in y_val]
 770     #         self.X_test = [Xi[test_ind:] for Xi in X_test]
 771     #         self.y_test = [yi[test_ind:].reshape(-1,1) for yi in y_test]
 772     #     else:
 773     #         self.X_train = X[:train_ind]
 774     #         self.y_train = y[:train_ind].reshape(-1,1) # assumes y 1-d, change this if vector output
 775     #         if val_frac >0:
 776     #             self.X_val = X[train_ind:test_ind]
 777     #             self.y_val = y[train_ind:test_ind].reshape(-1,1) # assumes y 1-d, change this if vector output
 778     #         self.X_test = X[test_ind:]
 779     #         self.y_test = y[test_ind:].reshape(-1,1) # assumes y 1-d, change this if vector output
 780
 781
 782
 783     #     # Print statements if verbose
 784     #     if verbose:
 785     #         print(f"Train index: 0 to {train_ind}")
 786     #         print(f"Validation index: {train_ind} to {test_ind}")
 787     #         print(f"Test index: {test_ind} to {self.hours}")
 788
 789     #         if spatial:
 790     #             print("Subsetting locations into train/val/test")
 791     #             print(f"Total Locations: {len(locs)}")
 792     #             print(f"Train Locations: {len(train_locs)}")
 793     #             print(f"Val. Locations: {len(val_locs)}")
 794     #             print(f"Test Locations: {len(test_locs)}")
 795     #             print(f"X_train[0] shape: {self.X_train[0].shape}, y_train[0] shape: {self.y_train[0].shape}")
 796     #             print(f"X_val[0] shape: {self.X_val[0].shape}, y_val[0] shape: {self.y_val[0].shape}")
 797     #             print(f"X_test[0] shape: {self.X_test[0].shape}, y_test[0] shape: {self.y_test[0].shape}")
 798     #         else:
 799     #             print(f"X_train shape: {self.X_train.shape}, y_train shape: {self.y_train.shape}")
 800     #             if hasattr(self, "X_val"):
 801     #                 print(f"X_val shape: {self.X_val.shape}, y_val shape: {self.y_val.shape}")
 802     #             print(f"X_test shape: {self.X_test.shape}, y_test shape: {self.y_test.shape}")
 803     def scale_data(self, verbose=True):
 804         """
 805         Scales the training data using the set scaler.
 806
 807         Parameters:
 808         -----------
 809         verbose : bool, optional
 810             If True, prints status messages. Default is True.
 811         """
 812         # Indicate whether multi timeseries or not
 813         spatial = self.spatial
 814         if self.scaler is None:
 815             raise ValueError("Scaler is not set. Use 'set_scaler' method to set a scaler before scaling data.")
 816         if hasattr(self.scaler, 'n_features_in_'):
 817             warnings.warn("Scale_data has already been called. Exiting to prevent issues.")
 818             return
 819         if not hasattr(self, "X_train"):
 820             raise AttributeError("No X_train within object. Run train_test_split first. This is to avoid fitting the scaler with prediction data.")
 821         if verbose:
 822             print(f"Scaling training data with scaler {self.scaler}, fitting on X_train")
 823
 824         if spatial:
 825             # Fit scaler on row-joined training data
 826             self.scaler.fit(np.vstack(self.X_train))
 827             # Transform data using fitted scaler
 828             self.X_train = [self.scaler.transform(Xi) for Xi in self.X_train]
 829             if hasattr(self, 'X_val'):
 830                 self.X_val = [self.scaler.transform(Xi) for Xi in self.X_val]
 831             self.X_test = [self.scaler.transform(Xi) for Xi in self.X_test]
 832         else:
 833             # Fit the scaler on the training data
 834             self.scaler.fit(self.X_train)
 835             # Transform the data using the fitted scaler
 836             self.X_train = self.scaler.transform(self.X_train)
 837             if hasattr(self, 'X_val'):
 838                 self.X_val = self.scaler.transform(self.X_val)
 839             self.X_test = self.scaler.transform(self.X_test)
 840
 841     # NOTE: only works for non spatial
 842     def scale_all_X(self, verbose=True):
 843         """
 844         Scales the all data using the set scaler.
 845
 846         Parameters:
 847         -----------
 848         verbose : bool, optional
 849             If True, prints status messages. Default is True.
 850         Returns:
 851         -------
 852         ndarray
 853             Scaled X matrix, subsetted to features_list.
 854         """
 855         if self.spatial:
 856             raise ValueError("Not implemented for spatial data")
 857
 858         if self.scaler is None:
 859             raise ValueError("Scaler is not set. Use 'set_scaler' method to set a scaler before scaling data.")
 860         if verbose:
 861             print(f"Scaling all X data with scaler {self.scaler}, fitted on X_train")
 862         # Subset features
 863         indices = []
 864         for item in self.features_list:
 865             if item in self.all_features_list:
 866                 indices.append(self.all_features_list.index(item))
 867             else:
 868                 print(f"Warning: feature name '{item}' not found in list of all features from input data")
 869         X = self.X[:, indices]
 870         X = self.scaler.transform(X)
 871
 872         return X
 873
 874     def inverse_scale(self, return_X = 'all_hours', save_changes=False, verbose=True):
 875         """
 876         Inversely scales the data to its original form.
 877
 878         Parameters:
 879         -----------
 880         return_X : str, optional
 881             Specifies what data to return after inverse scaling. Default is 'all_hours'.
 882         save_changes : bool, optional
 883             If True, updates the internal data with the inversely scaled values. Default is False.
 884         verbose : bool, optional
 885             If True, prints status messages. Default is True.
 886         """
 887         if verbose:
 888             print("Inverse scaling data...")
 889         X_train = self.scaler.inverse_transform(self.X_train)
 890         X_val = self.scaler.inverse_transform(self.X_val)
 891         X_test = self.scaler.inverse_transform(self.X_test)
 892
 893         if save_changes:
 894             print("Inverse transformed data saved")
 895             self.X_train = X_train
 896             self.X_val = X_val
 897             self.X_test = X_test
 898         else:
 899             if verbose:
 900                 print("Inverse scaled, but internal data not changed.")
 901         if verbose:
 902             print(f"Attempting to return {return_X}")
 903         if return_X == "all_hours":
 904             return np.concatenate((X_train, X_val, X_test), axis=0)
 905         else:
 906             print(f"Unrecognized or unimplemented return value {return_X}")
 907     def batch_reshape(self, timesteps, batch_size, hours=None, verbose=False):
 908         """
 909         Restructures input data to RNN using batches and sequences.
 910
 911         Parameters:
 912         ----------
 913         batch_size : int
 914             The size of each training batch to reshape the data.
 915         timesteps : int
 916             The number of timesteps or sequence length. Consistitutes a single sample
 917         timesteps : int
 918             Number of timesteps or sequence length used for a single sequence in RNN training. Constitutes a single sample to the model
 919
 920         batch_size : int
 921             Number of sequences used within a batch of training
 922
 923         Returns:
 924         -------
 925         None
 926             This method reshapes the data in place.
 927         Raises:
 928         ------
 929         AttributeError
 930             If either 'X_train' or 'y_train' attributes do not exist within the instance.
 931
 932         Notes:
 933         ------
 934         The reshaping method depends on self param "spatial".
 935         - spatial == False: Reshapes data assuming no spatial dimensions.
 936         - spatial == True: Reshapes data considering spatial dimensions.
 937
 938         """
 939
 940         if not hasattr(self, 'X_train') or not hasattr(self, 'y_train'):
 941             raise AttributeError("Both 'X_train' and 'y_train' must be set before reshaping batches.")
 942
 943         # Indicator of spatial training scheme or not
 944         spatial = self.spatial
 945
 946         if spatial:
 947             print(f"Reshaping spatial training data using batch size: {batch_size} and timesteps: {timesteps}")
 948             self.X_train, self.y_train, self.n_seqs = staircase_spatial(self.X_train, self.y_train, timesteps = timesteps, batch_size=batch_size, hours=hours, verbose=verbose)
 949             if hasattr(self, "X_val"):
 950                 print(f"Reshaping validation data using batch size: {batch_size} and timesteps: {timesteps}")
 951                 self.X_val, self.y_val, _ = staircase_spatial(self.X_val, self.y_val, timesteps = timesteps, batch_size=batch_size, hours=None, verbose=verbose)
 952         else:
 953             print(f"Reshaping training data using batch size: {batch_size} and timesteps: {timesteps}")
 954             self.X_train, self.y_train = staircase_2(self.X_train, self.y_train, timesteps = timesteps, batch_size=batch_size, verbose=verbose)
 955             if hasattr(self, "X_val"):
 956                 print(f"Reshaping validation data using batch size: {batch_size} and timesteps: {timesteps}")
 957                 self.X_val, self.y_val = staircase_2(self.X_val, self.y_val, timesteps = timesteps, batch_size=batch_size, verbose=verbose)
 958
 959     def print_hashes(self, attrs_to_check = ['X', 'y', 'X_train', 'y_train', 'X_val', 'y_val', 'X_test', 'y_test']):
 960         """
 961         Prints the hash of specified data attributes.
 962
 963         Parameters:
 964         -----------
 965         attrs_to_check : list, optional
 966             A list of attribute names to hash and print. Default includes 'X', 'y', and split data.
 967         """
 968         for attr in attrs_to_check:
 969             if hasattr(self, attr):
 970                 value = getattr(self, attr)
 971                 if self.spatial:
 972                     pass
 973                 else:
 974                     print(f"Hash of {attr}: {hash_ndarray(value)}")
 975     def __getattr__(self, key):
 976         """
 977         Allows attribute-style access to dictionary keys, a.k.a. enables the "." operator for get elements
 978         """
 979         try:
 980             return self[key]
 981         except KeyError:
 982             raise AttributeError(f"'rnn_data' object has no attribute '{key}'")
 983
 984     def __setitem__(self, key, value):
 985         """
 986         Ensures dictionary and attribute updates stay in sync for required keys.
 987         """
 988         super().__setitem__(key, value)  # Update the dictionary
 989         if key in self.required_keys:
 990             super().__setattr__(key, value)  # Ensure the attribute is updated as well
 991
 992     def __setattr__(self, key, value):
 993         """
 994         Ensures dictionary keys are updated when setting attributes.
 995         """
 996         self[key] = value
 997
 998
 999 # Function to check reproduciblity hashes, environment info, and model parameters
1000 def check_reproducibility(dict0, params, m_hash, w_hash):
1001     """
1002     Performs reproducibility checks on a model by comparing current settings and outputs with stored reproducibility information.
1003
1004     Parameters:
1005     -----------
1006     dict0 : dict
1007         The data dictionary that should contain reproducibility information under the 'repro_info' attribute.
1008     params : dict
1009         The current model parameters to be checked against the reproducibility information.
1010     m_hash : str
1011         The hash of the current model predictions.
1012     w_hash : str
1013         The hash of the current fitted model weights.
1014
1015     Returns:
1016     --------
1017     None
1018         The function returns None. It issues warnings if any reproducibility checks fail.
1019
1020     Notes:
1021     ------
1022     - Checks are only performed if the `dict0` contains the 'repro_info' attribute.
1023     - Issues warnings for mismatches in model weights, predictions, Python version, TensorFlow version, and model parameters.
1024     - Skips checks if physics-based initialization is used (not implemented).
1025     """
1026     if not hasattr(dict0, "repro_info"):
1027         warnings.warn("The provided data dictionary does not have the required 'repro_info' attribute. Not running reproduciblity checks.")
1028         return
1029
1030     repro_info = dict0.repro_info
1031     # Check Hashes
1032     if params['phys_initialize']:
1033         hashes = repro_info['phys_initialize']
1034         warnings.warn("Physics Initialization not implemented yet. Not running reproduciblity checks.")
1035     else:
1036         hashes = repro_info['rand_initialize']
1037         print(f"Fitted weights hash: {w_hash} \n Reproducibility weights hash: {hashes['fitted_weights_hash']}")
1038         print(f"Model predictions hash: {m_hash} \n Reproducibility preds hash: {hashes['preds_hash']}")
1039         if (w_hash != hashes['fitted_weights_hash']) or (m_hash != hashes['preds_hash']):
1040             if w_hash != hashes['fitted_weights_hash']:
1041                 warnings.warn("The fitted weights hash does not match the reproducibility weights hash.")
1042             if m_hash != hashes['preds_hash']:
1043                 warnings.warn("The predictions hash does not match the reproducibility predictions hash.")
1044         else:
1045             print("***Reproducibility Checks passed - model weights and model predictions match expected.***")
1046
1047     # Check Environment
1048     current_py_version = sys.version[0:6]
1049     current_tf_version = tf.__version__
1050     if current_py_version != repro_info['env_info']['py_version']:
1051         warnings.warn(f"Python version mismatch: Current Python version is {current_py_version}, "
1052                       f"expected {repro_info['env_info']['py_version']}.")
1053
1054     if current_tf_version != repro_info['env_info']['tf_version']:
1055         warnings.warn(f"TensorFlow version mismatch: Current TensorFlow version is {current_tf_version}, "
1056                       f"expected {repro_info['env_info']['tf_version']}.")
1057
1058     # Check Params
1059     repro_params = repro_info.get('params', {})
1060
1061     for key, repro_value in repro_params.items():
1062         if key in params:
1063             if params[key] != repro_value:
1064                 warnings.warn(f"Parameter mismatch for '{key}': Current value is {params[key]}, "
1065                               f"repro value is {repro_value}.")
1066         else:
1067             warnings.warn(f"Parameter '{key}' is missing in the current params.")
1068
1069     return
1070
1071 class RNNModel(ABC):
1072     """
1073     Abstract base class for RNN models, providing structure for training, predicting, and running reproducibility checks.
1074     """
1075     def __init__(self, params: dict):
1076         """
1077         Initializes the RNNModel with the given parameters.
1078
1079         Parameters:
1080         -----------
1081         params : dict
1082             A dictionary containing model parameters.
1083         """
1084         self.params = params
1085         if type(self) is RNNModel:
1086             raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
1087         super().__init__()
1088
1089     @abstractmethod
1090     def _build_model_train(self):
1091         """Abstract method to build the training model."""
1092         pass
1093
1094     @abstractmethod
1095     def _build_model_predict(self, return_sequences=True):
1096         """Abstract method to build the prediction model. This model copies weights from the train model but with input structure that allows for easier prediction of arbitrary length timeseries. This model is not to be used for training, or don't use with .fit calls"""
1097         pass
1098
1099     def is_stateful(self):
1100         """
1101         Checks whether any of the layers in the internal model (self.model_train) are stateful.
1102
1103         Returns:
1104         bool: True if at least one layer in the model is stateful, False otherwise.
1105
1106         This method iterates over all the layers in the model and checks if any of them
1107         have the 'stateful' attribute set to True. This is useful for determining if
1108         the model is designed to maintain state across batches during training.
1109
1110         Example:
1111         --------
1112         model.is_stateful()
1113         """
1114         for layer in self.model_train.layers:
1115             if hasattr(layer, 'stateful') and layer.stateful:
1116                 return True
1117         return False
1118
1119     def fit(self, X_train, y_train, plot_history=True, plot_title = '',
1120             weights=None, callbacks=[], validation_data=None, *args, **kwargs):
1121         """
1122         Trains the model on the provided training data. Uses the fit method of the training model and then copies the weights over to the prediction model, which has a less restrictive input shape. Formats a list of callbacks to use within the fit method based on params input
1123
1124         Parameters:
1125         -----------
1126         X_train : np.ndarray
1127             The input matrix data for training.
1128         y_train : np.ndarray
1129             The target vector data for training.
1130         plot_history : bool, optional
1131             If True, plots the training history. Default is True.
1132         plot_title : str, optional
1133             The title for the training plot. Default is an empty string.
1134         weights : optional
1135             Initial weights for the model. Default is None.
1136         callbacks : list, optional
1137             A list of callback functions to use during training. Default is an empty list.
1138         validation_data : tuple, optional
1139             Validation data to use during training, expected format (X_val, y_val). Default is None.
1140         """
1141         # verbose_fit argument is for printing out update after each epoch, which gets very long
1142         verbose_fit = self.params['verbose_fit']
1143         verbose_weights = self.params['verbose_weights']
1144         if verbose_weights:
1145             print(f"Training simple RNN with params: {self.params}")
1146
1147         # Setup callbacks
1148         if self.params["reset_states"]:
1149             callbacks=callbacks+[ResetStatesCallback(self.params), TerminateOnNaN()]
1150
1151         # Early stopping callback requires validation data
1152         if validation_data is not None:
1153             X_val, y_val =validation_data[0], validation_data[1]
1154             print("Using early stopping callback.")
1155             callbacks=callbacks+[EarlyStoppingCallback(patience = self.params['early_stopping_patience'])]
1156         if verbose_weights:
1157             print(f"Formatted X_train hash: {hash_ndarray(X_train)}")
1158             print(f"Formatted y_train hash: {hash_ndarray(y_train)}")
1159             if validation_data is not None:
1160                 print(f"Formatted X_val hash: {hash_ndarray(X_val)}")
1161                 print(f"Formatted y_val hash: {hash_ndarray(y_val)}")
1162             print(f"Initial weights before training hash: {hash_weights(self.model_train)}")
1163
1164         ## TODO: Hidden State Initialization
1165         # Evaluate Model once to set nonzero initial state
1166         # self.model_train(X_train[0:self.params['batch_size'],:,:])
1167
1168         if validation_data is not None:
1169             history = self.model_train.fit(
1170                 X_train, y_train,
1171                 epochs=self.params['epochs'],
1172                 batch_size=self.params['batch_size'],
1173                 callbacks = callbacks,
1174                 verbose=verbose_fit,
1175                 validation_data = (X_val, y_val),
1176                 *args, **kwargs
1177             )
1178         else:
1179             history = self.model_train.fit(
1180                 X_train, y_train,
1181                 epochs=self.params['epochs'],
1182                 batch_size=self.params['batch_size'],
1183                 callbacks = callbacks,
1184                 verbose=verbose_fit,
1185                 *args, **kwargs
1186             )
1187
1188         if plot_history:
1189             self.plot_history(history,plot_title)
1190
1191         if self.params["verbose_weights"]:
1192             print(f"Fitted Weights Hash: {hash_weights(self.model_train)}")
1193
1194         # Update Weights for Prediction Model
1195         w_fitted = self.model_train.get_weights()
1196         self.model_predict.set_weights(w_fitted)
1197
1198     def predict(self, X_test):
1199         """
1200         Generates predictions on the provided test data using the internal prediction model.
1201
1202         Parameters:
1203         -----------
1204         X_test : np.ndarray
1205             The input data for generating predictions.
1206
1207         Returns:
1208         --------
1209         np.ndarray
1210             The predicted values.
1211         """
1212         print("Predicting test data")
1213         X_test = self._format_pred_data(X_test)
1214         preds = self.model_predict.predict(X_test).flatten()
1215         return preds
1216
1217
1218     def _format_pred_data(self, X):
1219         """
1220         Formats the prediction data for RNN input.
1221
1222         Parameters:
1223         -----------
1224         X : np.ndarray
1225             The input data.
1226
1227         Returns:
1228         --------
1229         np.ndarray
1230             The formatted input data.
1231         """
1232         return np.reshape(X,(1, X.shape[0], self.params['n_features']))
1233
1234     def plot_history(self, history, plot_title, create_figure=True):
1235         """
1236         Plots the training history. Uses log scale on y axis for readability.
1237
1238         Parameters:
1239         -----------
1240         history : History object
1241             The training history object from model fitting. Output of keras' .fit command
1242         plot_title : str
1243             The title for the plot.
1244         """
1245
1246         if create_figure:
1247             plt.figure(figsize=(10, 6))
1248         plt.semilogy(history.history['loss'], label='Training loss')
1249         if 'val_loss' in history.history:
1250             plt.semilogy(history.history['val_loss'], label='Validation loss')
1251         plt.title(f'{plot_title} Model loss')
1252         plt.ylabel('Loss')
1253         plt.xlabel('Epoch')
1254         plt.legend(loc='upper left')
1255         plt.show()
1256
1257     def run_model(self, dict0, reproducibility_run=False, plot_period='all', save_outputs=True):
1258         """
1259         Runs the RNN model on input data dictionary, including training, prediction, and reproducibility checks.
1260
1261         Parameters:
1262         -----------
1263         dict0 : RNNData (dict)
1264             The dictionary containing the input data and configuration.
1265         reproducibility_run : bool, optional
1266             If True, performs reproducibility checks after running the model. Default is False.
1267         save_outputs : bool
1268             If True, writes model outputs into input dictionary.
1269
1270         Returns:
1271         --------
1272         tuple
1273             Model predictions and a dictionary of RMSE errors broken up by time period.
1274         """
1275         verbose_fit = self.params['verbose_fit']
1276         verbose_weights = self.params['verbose_weights']
1277         if verbose_weights:
1278             print("Input data hashes, NOT formatted for rnn sequence/batches yet")
1279             dict0.print_hashes()
1280         # Extract Datasets
1281         X_train, y_train, X_test, y_test = dict0.X_train, dict0.y_train, dict0.X_test, dict0.y_test
1282         if 'X_val' in dict0:
1283             X_val, y_val = dict0.X_val, dict0.y_val
1284         else:
1285             X_val = None
1286         if dict0.spatial:
1287             case_id = "Spatial Training Set"
1288         else:
1289             case_id = dict0.case
1290
1291         # Fit model
1292         if X_val is None:
1293             self.fit(X_train, y_train, plot_title=case_id)
1294         else:
1295             self.fit(X_train, y_train, validation_data = (X_val, y_val), plot_title=case_id)
1296
1297         # Generate Predictions and Evaluate Test Error
1298         if dict0.spatial:
1299             m, errs = self._eval_multi(dict0)
1300             if save_outputs:
1301                 dict0['m']=m
1302         else:
1303             m, errs = self._eval_single(dict0, verbose_weights, reproducibility_run)
1304             if save_outputs:
1305                 dict0['m']=m
1306             plot_data(dict0, title="RNN", title2=dict0.case, plot_period=plot_period)
1307
1308         return m, errs
1309
1310     def _eval_single(self, dict0, verbose_weights, reproducibility_run):
1311         # Generate Predictions,
1312         # run through training to get hidden state set properly for forecast period
1313         print(f"Running prediction on all input data, Training through Test")
1314         X = dict0.scale_all_X()
1315         y = dict0.y.flatten()
1316         # Predict
1317         if verbose_weights:
1318             print(f"All X hash: {hash_ndarray(X)}")
1319
1320         m = self.predict(X).flatten()
1321         if verbose_weights:
1322             print(f"Predictions Hash: {hash_ndarray(m)}")
1323
1324         if reproducibility_run:
1325             print("Checking Reproducibility")
1326             check_reproducibility(dict0, self.params, hash_ndarray(m), hash_weights(self.model_predict))
1327
1328         # print(dict0.keys())
1329         # Plot final fit and data
1330         # dict0['y'] = y
1331         # plot_data(dict0, title="RNN", title2=dict0['case'], plot_period=plot_period)
1332
1333         # Calculate Errors
1334         err = rmse(m, y)
1335         train_ind = dict0.train_ind # index of final training set value
1336         test_ind = dict0.test_ind # index of first test set value
1337
1338         err_train = rmse(m[:train_ind], y[:train_ind].flatten())
1339         err_pred = rmse(m[test_ind:], y[test_ind:].flatten())
1340         rmse_dict = {
1341             'all': err,
1342             'training': err_train,
1343             'prediction': err_pred
1344         }
1345         return m, rmse_dict
1346
1347     def _eval_multi(self, dict0):
1348         # Train Error: NOT DOING YET. DECIDE WHETHER THIS IS NEEDED
1349
1350         # Test Error
1351         new_data = np.stack(dict0.X_test, axis=0)
1352         y_array = np.stack(dict0.y_test, axis=0)
1353         preds = self.model_predict.predict(new_data)
1354
1355         # Calculate RMSE
1356         ## Note: not using util rmse function since this approach is for 3d arrays
1357         # Compute the squared differences
1358         squared_diff = np.square(preds - y_array)
1359
1360         # Mean squared error along the timesteps and dimensions (axis 1 and 2)
1361         mse = np.mean(squared_diff, axis=(1, 2))
1362
1363         # Root mean squared error (RMSE) for each timeseries
1364         rmses = np.sqrt(mse)
1365
1366         return preds, rmses
1367
1368
1369 ## Callbacks
1370
1371 # Helper functions for batch reset schedules
1372 def calc_exp_intervals(bmin, bmax, n_epochs, force_bmax = True):
1373     # Calculate the exponential intervals for each epoch
1374     epochs = np.arange(n_epochs)
1375     factors = epochs / n_epochs
1376     intervals = bmin * (bmax / bmin) ** factors
1377     if force_bmax:
1378         intervals[-1] = bmax  # Ensure the last value is exactly bmax
1379     return intervals.astype(int)
1380
1381 def calc_log_intervals(bmin, bmax, n_epochs, force_bmax = True):
1382     # Calculate the logarithmic intervals for each epoch
1383     epochs = np.arange(n_epochs)
1384     factors = np.log(1 + epochs) / np.log(1 + n_epochs)
1385     intervals = bmin + (bmax - bmin) * factors
1386     if force_bmax:
1387         intervals[-1] = bmax  # Ensure the last value is exactly bmax
1388     return intervals.astype(int)
1389
1390 class ResetStatesCallback(Callback):
1391     """
1392     Custom callback to reset the states of RNN layers at the end of each epoch and optionally after a specified number of batches.
1393
1394     Parameters:
1395     -----------
1396     batch_reset : int, optional
1397         If provided, resets the states of RNN layers after every `batch_reset` batches. Default is None.
1398     """
1399     # def __init__(self, bmin=None, bmax=None, epochs=None, loc_batch_reset = None, batch_schedule_type='linear', verbose=True):
1400     def __init__(self, params=None, verbose=True):
1401         """
1402         Initializes the ResetStatesCallback with an optional batch reset interval.
1403
1404         Parameters:
1405         -----------
1406         params: dict, optional
1407             Dictionary of parameters. If None provided, only on_epoch_end will trigger reset of hidden states.
1408             - bmin : int
1409                 Minimum for batch reset schedule
1410             - bmax : int
1411                 Maximum for batch reset schedule
1412             - epochs : int
1413                 Number of training epochs.
1414             - loc_batch_reset : int
1415                 Interval of batches after which to reset the states of RNN layers for location changes. Triggers reset for training AND validation phases
1416             - batch_schedule_type : str
1417                 Type of batch scheduling to be used. Recognized methods are following:
1418                 - 'constant' : Used fixed batch reset interval throughout training
1419                 - 'linear'   : Increases the batch reset interval linearly over epochs from bmin to bmax.
1420                 - 'exp'      : Increases the batch reset interval exponentially over epochs from bmin to bmax.
1421                 - 'log'      : Increases the batch reset interval logarithmically over epochs from bmin to bmax.
1422
1423
1424         Returns:
1425         -----------
1426         Only in-place reset of hidden states of RNN that calls uses this callback.
1427
1428         """
1429         super(ResetStatesCallback, self).__init__()
1430
1431         # Check for optional arguments, set None if missing in input params
1432         arg_list = ['bmin', 'bmax', 'epochs', 'loc_batch_reset', 'batch_schedule_type']
1433         for arg in arg_list:
1434             setattr(self, arg, params.get(arg, None))
1435
1436         self.verbose = verbose
1437         if self.verbose:
1438             print(f"Using ResetStatesCallback with Batch Reset Schedule: {self.batch_schedule_type}")
1439         # Calculate the reset intervals for each epoch during initialization
1440         if self.batch_schedule_type is not None:
1441             if self.epochs is None:
1442                 raise ValueError(f"Arugment `epochs` cannot be none with self.batch_schedule_type: {self.batch_schedule_type}")
1443             self.batch_reset_intervals = self._calc_reset_intervals(self.batch_schedule_type)
1444             if self.verbose:
1445                 print(f"batch_reset_intervals: {self.batch_reset_intervals}")
1446         else:
1447             self.batch_reset_intervals = None
1448     def on_epoch_end(self, epoch, logs=None):
1449         """
1450         Resets the states of RNN layers at the end of each epoch.
1451
1452         Parameters:
1453         -----------
1454         epoch : int
1455             The index of the current epoch.
1456         logs : dict, optional
1457             A dictionary containing metrics from the epoch. Default is None.
1458         """
1459         # print(f" Resetting hidden state after epoch: {epoch+1}", flush=True)
1460         # Iterate over each layer in the model
1461         for layer in self.model.layers:
1462             # Check if the layer has a reset_states method
1463             if hasattr(layer, 'reset_states'):
1464                 layer.reset_states()
1465     def _calc_reset_intervals(self,batch_schedule_type):
1466         methods = ['constant', 'linear', 'exp', 'log']
1467         if batch_schedule_type not in methods:
1468             raise ValueError(f"Batch schedule method {batch_schedule_type} not recognized. \n Available methods: {methods}")
1469         if batch_schedule_type == "constant":
1470
1471             return np.repeat(self.bmin, self.epochs).astype(int)
1472         elif batch_schedule_type == "linear":
1473             return np.linspace(self.bmin, self.bmax, self.epochs).astype(int)
1474         elif batch_schedule_type == "exp":
1475             return calc_exp_intervals(self.bmin, self.bmax, self.epochs)
1476         elif batch_schedule_type == "log":
1477             return calc_log_intervals(self.bmin, self.bmax, self.epochs)
1478     def on_epoch_begin(self, epoch, logs=None):
1479         # Set the reset interval for the current epoch
1480         if self.batch_reset_intervals is not None:
1481             self.current_batch_reset = self.batch_reset_intervals[epoch]
1482         else:
1483             self.current_batch_reset = None
1484     def on_train_batch_end(self, batch, logs=None):
1485         """
1486         Resets the states of RNN layers during training after a specified number of batches, if `batch_reset` or `loc_batch_reset` are provided. The `batch_reset` is used for stability and to avoid exploding gradients at the beginning of training when a hidden state is being passed with weights that haven't learned yet. The `loc_batch_reset` is used to reset the states when a particular batch is from a new location and thus the hidden state should be passed.
1487
1488         Parameters:
1489         -----------
1490         batch : int
1491             The index of the current batch.
1492         logs : dict, optional
1493             A dictionary containing metrics from the batch. Default is None.
1494         """
1495         batch_reset = self.current_batch_reset
1496         if (batch_reset is not None and batch % batch_reset == 0):
1497             # print(f" Resetting states after batch {batch + 1}")
1498             # Iterate over each layer in the model
1499             for layer in self.model.layers:
1500                 # Check if the layer has a reset_states method
1501                 if hasattr(layer, 'reset_states'):
1502                     layer.reset_states()
1503     def on_test_batch_end(self, batch, logs=None):
1504         """
1505         Resets the states of RNN layers during validation if `loc_batch_reset` is provided to demarcate a new location and thus avoid passing a hidden state to a wrong location.
1506
1507         Parameters:
1508         -----------
1509         batch : int
1510             The index of the current batch.
1511         logs : dict, optional
1512             A dictionary containing metrics from the batch. Default is None.
1513         """
1514         loc_batch_reset = self.loc_batch_reset
1515         if (loc_batch_reset is not None and batch % loc_batch_reset == 0):
1516             # print(f"Resetting states in Validation mode after batch {batch + 1}")
1517             # Iterate over each layer in the model
1518             for layer in self.model.layers:
1519                 # Check if the layer has a reset_states method
1520                 if hasattr(layer, 'reset_states'):
1521                     layer.reset_states()
1522
1523 ## Learning Schedules
1524 ## NOT TESTED YET
1525 lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
1526     initial_learning_rate=0.001,
1527     decay_steps=1000,
1528     alpha=0.0,
1529     name='CosineDecay',
1530     # warmup_target=None,
1531     # warmup_steps=100
1532 )
1533 ##
1534
1535 def EarlyStoppingCallback(patience=5):
1536     """
1537     Creates an EarlyStopping callback with the specified patience.
1538
1539     Args:
1540         patience (int): Number of epochs with no improvement after which training will be stopped.
1541
1542     Returns:
1543         EarlyStopping: Configured EarlyStopping callback.
1544     """
1545     return EarlyStopping(
1546         monitor='val_loss',
1547         patience=patience,
1548         verbose=1,
1549         mode='min',
1550         restore_best_weights=True
1551     )
1552
1553 phys_params = {
1554     'DeltaE': [0,-1],                    # bias correction
1555     'T1': 0.1,                           # 1/fuel class (10)
1556     'fm_raise_vs_rain': 0.2              # fm increase per mm rain
1557 }
1558
1559
1560
1561 def get_initial_weights(model_fit,params,scale_fm=1):
1562     # Given a RNN architecture and hyperparameter dictionary, return array of physics-initiated weights
1563     # Inputs:
1564     # model_fit: output of create_RNN_2 with no training
1565     # params: (dict) dictionary of hyperparameters
1566     # rnn_dat: (dict) data dictionary, output of create_rnn_dat
1567     # Returns: numpy ndarray of weights that should be a rough solution to the moisture ODE
1568     DeltaE = phys_params['DeltaE']
1569     T1 = phys_params['T1']
1570     fmr = phys_params['fm_raise_vs_rain']
1571     centering = params['centering']  # shift activation down
1572
1573     w0_initial={'Ed':(1.-np.exp(-T1))/2,
1574                 'Ew':(1.-np.exp(-T1))/2,
1575                 'rain':fmr * scale_fm}   # wx - input feature
1576                                  #  wh      wb   wd    bd = bias -1
1577
1578     w_initial=np.array([np.nan, np.exp(-0.1), DeltaE[0]/scale_fm, # layer 0
1579                         1.0, -centering[0] + DeltaE[1]/scale_fm])                 # layer 1
1580     if params['verbose_weights']:
1581         print('Equilibrium moisture correction bias',DeltaE[0],
1582               'in the hidden layer and',DeltaE[1],' in the output layer')
1583
1584     w_name = ['wx','wh','bh','wd','bd']
1585
1586     w=model_fit.get_weights()
1587     for j in range(w[0].shape[0]):
1588             feature = params['features_list'][j]
1589             for k in range(w[0].shape[1]):
1590                     w[0][j][k]=w0_initial[feature]
1591     for i in range(1,len(w)):            # number of the weight
1592         for j in range(w[i].shape[0]):   # number of the inputs
1593             if w[i].ndim==2:
1594                 # initialize all entries of the weight matrix to the same number
1595                 for k in range(w[i].shape[1]):
1596                     w[i][j][k]=w_initial[i]/w[i].shape[0]
1597             elif w[i].ndim==1:
1598                 w[i][j]=w_initial[i]
1599             else:
1600                 print('weight',i,'shape',w[i].shape)
1601                 raise ValueError("Only 1 or 2 dimensions supported")
1602         if params['verbose_weights']:
1603             print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
1604                   'initial: sum',np.sum(w[i],axis=0),'\nentries',w[i])
1605
1606     return w, w_name
1607
1608 class RNN(RNNModel):
1609     """
1610     A concrete implementation of the RNNModel abstract base class, using simple recurrent cells for hidden recurrent layers.
1611
1612     Parameters:
1613     -----------
1614     params : dict
1615         A dictionary of model parameters.
1616     loss : str, optional
1617         The loss function to use during model training. Default is 'mean_squared_error'.
1618     """
1619     def __init__(self, params, loss='mean_squared_error'):
1620         """
1621         Initializes the RNN model by building the training and prediction models.
1622
1623         Parameters:
1624         -----------
1625         params : dict or RNNParams
1626             A dictionary containing the model's parameters.
1627         loss : str, optional
1628             The loss function to use during model training. Default is 'mean_squared_error'.
1629         """
1630         super().__init__(params)
1631         self.model_train = self._build_model_train()
1632         self.model_predict = self._build_model_predict()
1633
1634     def _build_model_train(self):
1635         """
1636         Builds and compiles the training model, with batch & sequence shape specifications for input.
1637
1638         Returns:
1639         --------
1640         model : tf.keras.Model
1641             The compiled Keras model for training.
1642         """
1643         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
1644         x = inputs
1645         for i in range(self.params['rnn_layers']):
1646             # Return sequences True if recurrent layer feeds into another recurrent layer.
1647             # False if feeds into dense layer
1648             return_sequences = True if i < self.params['rnn_layers'] - 1 else False
1649             x = SimpleRNN(
1650                 units=self.params['rnn_units'],
1651                 activation=self.params['activation'][0],
1652                 dropout=self.params["dropout"][0],
1653                 recurrent_dropout = self.params["recurrent_dropout"],
1654                 stateful=self.params['stateful'],
1655                 return_sequences=return_sequences)(x)
1656         if self.params["dropout"][1] > 0:
1657             x = Dropout(self.params["dropout"][1])(x)
1658         for i in range(self.params['dense_layers']):
1659             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1660         # Add final output layer, must be 1 dense cell with linear activation if continuous scalar output
1661         x = Dense(units=1, activation='linear')(x)
1662         model = tf.keras.Model(inputs=inputs, outputs=x)
1663         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1664         model.compile(loss='mean_squared_error', optimizer=optimizer)
1665
1666         if self.params["verbose_weights"]:
1667             print(f"Initial Weights Hash: {hash_weights(model)}")
1668             # print(model.get_weights())
1669
1670         if self.params['phys_initialize']:
1671             assert self.params['scaler'] == 'reproducibility', f"Not implemented yet to do physics initialize with given data scaling {self.params['scaler']}"
1672             assert self.params['features_list'] == ['Ed', 'Ew', 'rain'], f"Physics initiation can only be done with features ['Ed', 'Ew', 'rain'], but given features {self.params['features_list']}"
1673             print("Initializing Model with Physics based weights")
1674             w, w_name=get_initial_weights(model, self.params)
1675             model.set_weights(w)
1676             print('initial weights hash =',hash_weights(model))
1677         return model
1678
1679     def _build_model_predict(self, return_sequences=True):
1680         """
1681         Builds and compiles the prediction model, doesn't use batch shape nor sequence length to make it easier to predict arbitrary number of timesteps. This model has weights copied over from training model is not directly used for training itself.
1682
1683         Parameters:
1684         -----------
1685         return_sequences : bool, optional
1686             Whether to return the full sequence of outputs. Default is True.
1687
1688         Returns:
1689         --------
1690         model : tf.keras.Model
1691             The compiled Keras model for prediction.
1692         """
1693         inputs = tf.keras.Input(shape=(None,self.params['n_features']))
1694         x = inputs
1695         for i in range(self.params['rnn_layers']):
1696             x = SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
1697                   stateful=False,return_sequences=return_sequences)(x)
1698         for i in range(self.params['dense_layers']):
1699             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1700         # Add final output layer, must be 1 dense cell with linear activation if continuous scalar output
1701         x = Dense(units=1, activation='linear')(x)
1702         model = tf.keras.Model(inputs=inputs, outputs=x)
1703         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1704         model.compile(loss='mean_squared_error', optimizer=optimizer)
1705
1706         # Set Weights to model_train
1707         w_fitted = self.model_train.get_weights()
1708         model.set_weights(w_fitted)
1709
1710         return model
1711
1712
1713 class RNN_LSTM(RNNModel):
1714     """
1715     A concrete implementation of the RNNModel abstract base class, use LSTM cells for hidden recurrent layers.
1716
1717     Parameters:
1718     -----------
1719     params : dict
1720         A dictionary of model parameters.
1721     loss : str, optional
1722         The loss function to use during model training. Default is 'mean_squared_error'.
1723     """
1724     def __init__(self, params, loss='mean_squared_error'):
1725         """
1726         Initializes the RNN model by building the training and prediction models.
1727
1728         Parameters:
1729         -----------
1730         params : dict or RNNParams
1731             A dictionary containing the model's parameters.
1732         loss : str, optional
1733             The loss function to use during model training. Default is 'mean_squared_error'.
1734         """
1735         super().__init__(params)
1736         self.model_train = self._build_model_train()
1737         self.model_predict = self._build_model_predict()
1738
1739     def _build_model_train(self):
1740         """
1741         Builds and compiles the training model, with batch & sequence shape specifications for input.
1742
1743         Returns:
1744         --------
1745         model : tf.keras.Model
1746             The compiled Keras model for training.
1747         """
1748         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
1749         x = inputs
1750         for i in range(self.params['rnn_layers']):
1751             return_sequences = True if i < self.params['rnn_layers'] - 1 else False
1752             x = LSTM(
1753                 units=self.params['rnn_units'],
1754                 activation=self.params['activation'][0],
1755                 dropout=self.params["dropout"][0],
1756                 recurrent_dropout = self.params["recurrent_dropout"],
1757                 recurrent_activation=self.params["recurrent_activation"],
1758                 stateful=self.params['stateful'],
1759                 return_sequences=return_sequences)(x)
1760         if self.params["dropout"][1] > 0:
1761             x = Dropout(self.params["dropout"][1])(x)
1762         for i in range(self.params['dense_layers']):
1763             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1764         model = tf.keras.Model(inputs=inputs, outputs=x)
1765         # optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'], clipvalue=self.params['clipvalue'])
1766         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1767         model.compile(loss='mean_squared_error', optimizer=optimizer)
1768
1769         if self.params["verbose_weights"]:
1770             print(f"Initial Weights Hash: {hash_weights(model)}")
1771         return model
1772     def _build_model_predict(self, return_sequences=True):
1773         """
1774         Builds and compiles the prediction model, doesn't use batch shape nor sequence length to make it easier to predict arbitrary number of timesteps. This model has weights copied over from training model is not directly used for training itself.
1775
1776         Parameters:
1777         -----------
1778         return_sequences : bool, optional
1779             Whether to return the full sequence of outputs. Default is True.
1780
1781         Returns:
1782         --------
1783         model : tf.keras.Model
1784             The compiled Keras model for prediction.
1785         """
1786         inputs = tf.keras.Input(shape=(None,self.params['n_features']))
1787         x = inputs
1788         for i in range(self.params['rnn_layers']):
1789             x = LSTM(
1790                 units=self.params['rnn_units'],
1791                 activation=self.params['activation'][0],
1792                 stateful=False,return_sequences=return_sequences)(x)
1793         for i in range(self.params['dense_layers']):
1794             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1795         model = tf.keras.Model(inputs=inputs, outputs=x)
1796         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1797         model.compile(loss='mean_squared_error', optimizer=optimizer)
1798
1799         # Set Weights to model_train
1800         w_fitted = self.model_train.get_weights()
1801         model.set_weights(w_fitted)
1802
1803         return model
1804
1805
1806
1807