fmda/moisture_rnn.py

   1 # v2 training and prediction class infrastructure
   2
   3 # Environment
   4 import random
   5 import numpy as np
   6 import pandas as pd
   7 import tensorflow as tf
   8 import matplotlib.pyplot as plt
   9 import sys
  10 from tensorflow.keras.callbacks import Callback, EarlyStopping, TerminateOnNaN
  11 # from sklearn.metrics import mean_squared_error
  12 import logging
  13 from tensorflow.keras.layers import LSTM, SimpleRNN, Input, Dropout, Dense
  14 # Local modules
  15 import reproducibility
  16 # from utils import print_dict_summary
  17 from abc import ABC, abstractmethod
  18 from utils import hash2, all_items_exist, hash_ndarray, hash_weights
  19 from data_funcs import rmse, plot_data, compare_dicts
  20 import copy
  21 # import yaml
  22 from sklearn.preprocessing import MinMaxScaler, StandardScaler
  23 import warnings
  24
  25 #*************************************************************************************
  26 # Data Formatting Functions
  27
  28 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
  29     # x [datapoints,features]    all inputs
  30     # y [datapoints,outputs]
  31     # timesteps: split x and y into samples length timesteps, shifted by 1
  32     # datapoints: number of timesteps to use for training, no more than y.shape[0]
  33     if verbose:
  34         print('staircase: shape x = ',x.shape)
  35         print('staircase: shape y = ',y.shape)
  36         print('staircase: timesteps=',timesteps)
  37         print('staircase: datapoints=',datapoints)
  38         print('staircase: return_sequences=',return_sequences)
  39     outputs = y.shape[1]
  40     features = x.shape[1]
  41     samples = datapoints-timesteps+1
  42     if verbose:
  43         print('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
  44     x_train = np.empty([samples, timesteps, features])
  45     if return_sequences:
  46         if verbose:
  47             print('returning all timesteps in a sample')
  48         y_train = np.empty([samples, timesteps, outputs])  # all
  49         for i in range(samples):
  50             for k in range(timesteps):
  51                 x_train[i,k,:] = x[i+k,:]
  52                 y_train[i,k,:] = y[i+k,:]
  53     else:
  54         if verbose:
  55             print('returning only the last timestep in a sample')
  56         y_train = np.empty([samples, outputs])
  57         for i in range(samples):
  58             for k in range(timesteps):
  59                 x_train[i,k,:] = x[i+k,:]
  60             y_train[i,:] = y[i+timesteps-1,:]
  61
  62     return x_train, y_train
  63
  64 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = False):
  65     # create RNN training data in multiple batches
  66     # input:
  67     #     x (,features)
  68     #     y (,outputs)
  69     #     timesteps: split x and y into sequences length timesteps
  70     #                a.k.a. lookback or sequence_length
  71
  72     # print params if verbose
  73
  74     if batch_size is None:
  75         raise ValueError('staircase_2 requires batch_size')
  76     if verbose:
  77         print('staircase_2: shape x = ',x.shape)
  78         print('staircase_2: shape y = ',y.shape)
  79         print('staircase_2: timesteps=',timesteps)
  80         print('staircase_2: batch_size=',batch_size)
  81         print('staircase_2: return_sequences=',return_sequences)
  82
  83     nx,features= x.shape
  84     ny,outputs = y.shape
  85     datapoints = min(nx,ny,trainsteps)
  86     if verbose:
  87         print('staircase_2: datapoints=',datapoints)
  88
  89     # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
  90     # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
  91
  92     # example with timesteps=3 batch_size=3 datapoints=15
  93     #     batch 0: [0 1 2]      [1 2 3]      [2 3 4]
  94     #     batch 1: [3 4 5]      [4 5 6]      [5 6 7]
  95     #     batch 2: [6 7 8]      [7 8 9]      [8 9 10]
  96     #     batch 3: [9 10 11]    [10 11 12]   [11 12 13]
  97     #     batch 4: [12 13 14]   [13 14 15]    when runs out this is the last batch, can be shorter
  98     #
  99     # TODO: implement for multiple locations, same starting time for each batch
 100     #              Loc 1         Loc 2       Loc 3
 101     #     batch 0: [0 1 2]      [0 1 2]      [0 1 2]
 102     #     batch 1: [3 4 5]      [3 4 5]      [3 4 5]
 103     #     batch 2: [6 7 8]      [6 7 8]      [6 7 8]
 104     # TODO: second epoch shift starting time at batch 0 in time
 105
 106     # TODO: implement for multiple locations, different starting times for each batch
 107     #              Loc 1       Loc 2       Loc 3
 108     #     batch 0: [0 1 2]   [1 2 3]      [2 3 4]
 109     #     batch 1: [3 4 5]   [4 5 6]      [5 6 57
 110     #     batch 2: [6 7 8]   [7 8 9]      [8 9 10]
 111
 112     #
 113     #     the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
 114     #     e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
 115     #     becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
 116     #
 117     #     sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
 118     #     the state at time 0 is fixed but the state is considered a variable at times 1 and 2
 119     #     the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
 120     #     sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5    #     the state at time 3 is fixed to the output of the first sequence [0 1 2]
 121     #     the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
 122     #     how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
 123     #     there is only one set of weights   h(2) = f(h(1),w)  h(1) = f(h(0),w)   but w is always the same
 124     #     each column is a one successive evaluation of h(n+1) = f(h(n),w)  for n = n_startn n_start+1,...
 125     #     the cannot be evaluated efficiently on gpu because gpu is a parallel processor
 126     #     this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
 127     #     each batch consists of independent calculations
 128     #     but it can depend on the result of the previous batch (that's the recurrent parr)
 129
 130
 131
 132     max_batches = datapoints // timesteps
 133     max_sequences = max_batches * batch_size
 134
 135     if verbose:
 136         print('staircase_2: max_batches=',max_batches)
 137         print('staircase_2: max_sequences=',max_sequences)
 138
 139     x_train = np.zeros((max_sequences, timesteps, features))
 140     if return_sequences:
 141         y_train = np.empty((max_sequences, timesteps, outputs))
 142     else:
 143         y_train = np.empty((max_sequences, outputs ))
 144
 145     # build the sequences
 146     k=0
 147     for i in range(max_batches):
 148         for j in range(batch_size):
 149             begin = i*timesteps + j
 150             next  = begin + timesteps
 151             if next > datapoints:
 152                 break
 153             if verbose:
 154                 print('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
 155             x_train[k,:,:] = x[begin:next,:]
 156             if return_sequences:
 157                  y_train[k,:,:] = y[begin:next,:]
 158             else:
 159                  y_train[k,:] = y[next-1,:]
 160             k += 1
 161     if verbose:
 162         print('staircase_2: shape x_train = ',x_train.shape)
 163         print('staircase_2: shape y_train = ',y_train.shape)
 164         print('staircase_2: sequences generated',k)
 165         print('staircase_2: batch_size=',batch_size)
 166     k = (k // batch_size) * batch_size
 167     if verbose:
 168         print('staircase_2: removing partial and empty batches at the end, keeping',k)
 169     x_train = x_train[:k,:,:]
 170     if return_sequences:
 171          y_train = y_train[:k,:,:]
 172     else:
 173          y_train = y_train[:k,:]
 174
 175     if verbose:
 176         print('staircase_2: shape x_train = ',x_train.shape)
 177         print('staircase_2: shape y_train = ',y_train.shape)
 178
 179     return x_train, y_train
 180
 181
 182 # Dictionary of scalers, used to avoid multiple object creation and to avoid multiple if statements
 183 scalers = {
 184     'minmax': MinMaxScaler(),
 185     'standard': StandardScaler()
 186 }
 187
 188
 189 def batch_setup(ids, batch_size):
 190     """
 191     Sets up stateful batched training data scheme for RNN training.
 192
 193     This function takes a list or array of identifiers (`ids`) and divides them into batches of a specified size (`batch_size`). If the last batch does not have enough elements to meet the `batch_size`, the function will loop back to the start of the identifiers and continue filling the batch until it reaches the required size.
 194
 195     Parameters:
 196     -----------
 197     ids : list or numpy array
 198         A list or numpy array containing the ids to be batched.
 199
 200     batch_size : int
 201         The desired size of each batch.
 202
 203     Returns:
 204     --------
 205     batches : list of lists
 206         A list where each element is a batch (itself a list) of identifiers. Each batch will contain exactly `batch_size` elements.
 207
 208     Example:
 209     --------
 210     >>> ids = [1, 2, 3, 4, 5]
 211     >>> batch_size = 3
 212     >>> batch_setup(ids, batch_size)
 213     [[1, 2, 3], [4, 5, 1]]
 214
 215     Notes:
 216     ------
 217     - If `ids` is shorter than `batch_size`, the returned list will contain a single batch where identifiers are repeated from the start of `ids` until the batch is filled.
 218     """
 219     # Ensure ids is a numpy array
 220     x = np.array(ids)
 221
 222     # Initialize the list to hold the batches
 223     batches = []
 224
 225     # Use a loop to slice the list/array into batches
 226     for i in range(0, len(x), batch_size):
 227         batch = list(x[i:i + batch_size])
 228
 229         # If the batch is not full, continue from the start
 230         while len(batch) < batch_size:
 231             # Calculate the remaining number of items needed
 232             remaining = batch_size - len(batch)
 233             # Append the needed number of items from the start of the array
 234             batch.extend(x[:remaining])
 235
 236         batches.append(batch)
 237
 238     return batches
 239
 240 def staircase_spatial(X, y, batch_size, timesteps, hours=None, start_times = None, verbose = True):
 241     """
 242     Prepares spatially formatted time series data for RNN training by creating batches of sequences across different locations, stacked to be compatible with stateful models.
 243
 244     This function processes multi-location time series data by slicing it into batches and formatting it to fit into a recurrent neural network (RNN) model. It utilizes a staircase-like approach to prepare sequences for each location and then interlaces them to align with stateful RNN structures.
 245
 246     Parameters:
 247     -----------
 248     X : list of numpy arrays
 249         A list where each element is a numpy array containing features for a specific location. The shape of each array is `(total_time_steps, features)`.
 250
 251     y : list of numpy arrays
 252         A list where each element is a numpy array containing the target values for a specific location. The shape of each array is `(total_time_steps,)`.
 253
 254     batch_size : int
 255         The number of sequences to include in each batch.
 256
 257     timesteps : int
 258         The number of time steps to include in each sequence for the RNN.
 259
 260     hours : int, optional
 261         The length of each time series to consider for each location. If `None`, it defaults to the minimum length of `y` across all locations.
 262
 263     start_times : numpy array, optional
 264         The initial time step for each location. If `None`, it defaults to an array starting from 0 and incrementing by 1 for each location.
 265
 266     verbose : bool, optional
 267         If `True`, prints additional information during processing. Default is `True`.
 268
 269     Returns:
 270     --------
 271     XX : numpy array
 272         A 3D numpy array with shape `(total_sequences, timesteps, features)` containing the prepared feature sequences for all locations.
 273
 274     yy : numpy array
 275         A 2D numpy array with shape `(total_sequences, 1)` containing the corresponding target values for all locations.
 276
 277     n_seqs : int
 278         Number of sequences per location. Used to reset states when location changes. Hidden state of RNN will be reset after n_seqs number of batches
 279
 280     Notes:
 281     ------
 282     - The function handles spatially distributed time series data by batching and formatting it for stateful RNNs.
 283     - `hours` determines how much of the time series is used for each location. If not provided, it defaults to the shortest series in `y`.
 284     - If `start_times` is not provided, it assumes each location starts its series at progressively later time steps.
 285     - The `batch_setup` function is used internally to manage the creation of location and time step batches.
 286     - The returned feature sequences `XX` and target sequences `yy` are interlaced to align with the expected input format of stateful RNNs.
 287     """
 288
 289     # Generate ids based on number of distinct timeseries provided
 290     n_loc = len(y) # assuming each list entry for y is a separate location
 291     loc_ids = np.arange(n_loc)
 292
 293     # Generate hours and start_times if None
 294     if hours is None:
 295         print("Setting total hours to minimum length of y in provided dictionary")
 296         hours = min(len(yi) for yi in y)
 297     if start_times is None:
 298         print("Setting Start times to offset by 1 hour by location")
 299         start_times = np.arange(n_loc)
 300     # Set up batches
 301     loc_batch, t_batch =  batch_setup(loc_ids, batch_size), batch_setup(start_times, batch_size)
 302     if verbose:
 303         print(f"Location ID Batches: {loc_batch}")
 304         print(f"Start Times for Batches: {t_batch}")
 305
 306     # Loop over batches and construct with staircase_2
 307     Xs = []
 308     ys = []
 309     for i in range(0, len(loc_batch)):
 310         locs_i = loc_batch[i]
 311         ts = t_batch[i]
 312         for j in range(0, len(locs_i)):
 313             t0 = ts[j]
 314             tend = t0 + hours
 315             # Create RNNData Dict
 316             # Subset data to given location and time from t0 to t0+hours
 317             k = locs_i[j] # Used to account for fewer locations than batch size
 318             X_temp = X[k][t0:tend,:]
 319             y_temp = y[k][t0:tend].reshape(-1,1)
 320
 321             # Format sequences
 322             Xi, yi = staircase_2(
 323                 X_temp,
 324                 y_temp,
 325                 timesteps = timesteps,
 326                 batch_size = 1,  # note: using 1 here to format sequences for a single location, not same as target batch size for training data
 327                 verbose=False)
 328
 329             Xs.append(Xi)
 330             ys.append(yi)
 331
 332     # Drop incomplete batches
 333     lens = [yi.shape[0] for yi in ys]
 334     n_seqs = min(lens)
 335     if verbose:
 336         print(f"Minimum number of sequences by location: {n_seqs}")
 337         print(f"Applying minimum length to other arrays.")
 338     Xs = [Xi[:n_seqs] for Xi in Xs]
 339     ys = [yi[:n_seqs] for yi in ys]
 340
 341     # Interlace arrays to match stateful structure
 342     n_features = Xi.shape[2]
 343     XXs = []
 344     yys = []
 345     for i in range(0, len(loc_batch)):
 346         locs_i = loc_batch[i]
 347         XXi = np.empty((Xs[0].shape[0]*batch_size, 5, n_features))
 348         yyi = np.empty((Xs[0].shape[0]*batch_size, 1))
 349         for j in range(0, len(locs_i)):
 350             XXi[j::(batch_size)] =  Xs[locs_i[j]]
 351             yyi[j::(batch_size)] =  ys[locs_i[j]]
 352         XXs.append(XXi)
 353         yys.append(yyi)
 354     yy = np.concatenate(yys, axis=0)
 355     XX = np.concatenate(XXs, axis=0)
 356
 357     if verbose:
 358         print(f"Spatially Formatted X Shape: {XX.shape}")
 359         print(f"Spatially Formatted X Shape: {yy.shape}")
 360
 361
 362     return XX, yy, n_seqs
 363
 364 #***********************************************************************************************
 365 ### RNN Class Functionality
 366
 367 class RNNParams(dict):
 368     """
 369     A custom dictionary class for handling RNN parameters. Automatically calculates certain params based on others. Overwrites the update method to protect from incompatible parameter choices. Inherits from dict
 370     """
 371     def __init__(self, input_dict):
 372         """
 373         Initializes the RNNParams instance and runs checks and shape calculations.
 374
 375         Parameters:
 376         -----------
 377         input_dict : dict,
 378             A dictionary containing RNN parameters.
 379         """
 380         super().__init__(input_dict)
 381         # Automatically run checks on initialization
 382         self.run_checks()
 383         # Automatically calculate shapes on initialization
 384         self.calc_param_shapes()
 385     def run_checks(self, verbose=True):
 386         """
 387         Validates that required keys exist and are of the correct type.
 388
 389         Parameters:
 390         -----------
 391         verbose : bool, optional
 392             If True, prints status messages. Default is True.
 393         """
 394         print("Checking params...")
 395         # Keys must exist and be integers
 396         int_keys = [
 397             'batch_size', 'timesteps', 'rnn_layers',
 398             'rnn_units', 'dense_layers', 'dense_units', 'epochs'
 399         ]
 400
 401         for key in int_keys:
 402             assert key in self, f"Missing required key: {key}"
 403             assert isinstance(self[key], int), f"Key '{key}' must be an integer"
 404
 405         # Keys must exist and be lists
 406         list_keys = ['activation', 'features_list', 'dropout', 'time_fracs']
 407         for key in list_keys:
 408             assert key in self, f"Missing required key: {key}"
 409             assert isinstance(self[key], list), f"Key '{key}' must be a list"
 410
 411         # Keys must exist and be floats
 412         float_keys = ['learning_rate']
 413         for key in float_keys:
 414             assert key in self, f"Missing required key: {key}"
 415             assert isinstance(self[key], float), f"Key '{key}' must be a float"
 416
 417         print("Input dictionary passed all checks.")
 418     def calc_param_shapes(self, verbose=True):
 419         """
 420         Calculates and updates the shapes of certain parameters based on input data.
 421
 422         Parameters:
 423         -----------
 424         verbose : bool, optional
 425             If True, prints status messages. Default is True.
 426         """
 427         if verbose:
 428             print("Calculating shape params based on features list, timesteps, and batch size")
 429             print(f"Input Feature List: {self['features_list']}")
 430             print(f"Input Timesteps: {self['timesteps']}")
 431             print(f"Input Batch Size: {self['batch_size']}")
 432
 433         n_features = len(self['features_list'])
 434         batch_shape = (self["batch_size"], self["timesteps"], n_features)
 435         if verbose:
 436             print("Calculated params:")
 437             print(f"Number of features: {n_features}")
 438             print(f"Batch Shape: {batch_shape}")
 439
 440         # Update the dictionary
 441         super().update({
 442             'n_features': n_features,
 443             'batch_shape': batch_shape
 444         })
 445         if verbose:
 446             print(self)
 447
 448     def update(self, *args, verbose=True, **kwargs):
 449         """
 450         Overwrites the standard update functon from dict. This is to prevent certain keys from being modified directly and to automatically update keys to be compatible with each other. The keys handled relate to the shape of the input data to the RNN.
 451
 452         Parameters:
 453         -----------
 454         verbose : bool, optional
 455             If True, prints status messages. Default is True.
 456         """
 457         # Prevent updating n_features and batch_shape
 458         restricted_keys = {'n_features', 'batch_shape'}
 459         keys_to_check = {'features_list', 'timesteps', 'batch_size'}
 460
 461         # Check for restricted keys in args
 462         if args:
 463             if isinstance(args[0], dict):
 464                 if restricted_keys & args[0].keys():
 465                     raise KeyError(f"Cannot directly update keys: {restricted_keys & args[0].keys()}, \n Instead update one of: {keys_to_check}")
 466             elif isinstance(args[0], (tuple, list)) and all(isinstance(i, tuple) and len(i) == 2 for i in args[0]):
 467                 if restricted_keys & {k for k, v in args[0]}:
 468                     raise KeyError(f"Cannot directly update keys: {restricted_keys & {k for k, v in args[0]}}, \n Instead update one of: {keys_to_check}")
 469
 470         # Check for restricted keys in kwargs
 471         if restricted_keys & kwargs.keys():
 472             raise KeyError(f"Cannot update restricted keys: {restricted_keys & kwargs.keys()}")
 473
 474
 475         # Track if specific keys are updated
 476         keys_updated = set()
 477
 478         # Update using the standard dict update method
 479         if args:
 480             if isinstance(args[0], dict):
 481                 keys_updated.update(args[0].keys() & keys_to_check)
 482             elif isinstance(args[0], (tuple, list)) and all(isinstance(i, tuple) and len(i) == 2 for i in args[0]):
 483                 keys_updated.update(k for k, v in args[0] if k in keys_to_check)
 484
 485         if kwargs:
 486             keys_updated.update(kwargs.keys() & keys_to_check)
 487
 488         # Call the parent update method
 489         super().update(*args, **kwargs)
 490
 491         # Recalculate shapes if necessary
 492         if keys_updated:
 493             self.calc_param_shapes(verbose=verbose)
 494
 495
 496 ## Class for handling input data
 497 class RNNData(dict):
 498     """
 499     A custom dictionary class for managing RNN data, with validation, scaling, and train-test splitting functionality.
 500     """
 501     required_keys = {"loc", "time", "X", "y", "features_list"}
 502     def __init__(self, input_dict, scaler=None, features_list=None):
 503         """
 504         Initializes the RNNData instance, performs checks, and prepares data.
 505
 506         Parameters:
 507         -----------
 508         input_dict : dict
 509             A dictionary containing the initial data.
 510         scaler : str, optional
 511             The name of the scaler to be used (e.g., 'minmax', 'standard'). Default is None.
 512         features_list : list, optional
 513             A subset of features to be used. Default is None which means all features.
 514         """
 515
 516         # Copy to avoid changing external input
 517         input_data = input_dict.copy()
 518         # Initialize inherited dict class
 519         super().__init__(input_data)
 520
 521         # Check if input data is one timeseries dataset or multiple
 522         if type(self.loc['STID']) == str:
 523             self.spatial = False
 524             print("Input data is single timeseries.")
 525         elif type(self.loc['STID']) == list:
 526             self.spatial = True
 527             print("Input data from multiple timeseries.")
 528         else:
 529             raise KeyError(f"Input locations not list or single string")
 530
 531         # Set up Data Scaling
 532         self.scaler = None
 533         if scaler is not None:
 534             self.set_scaler(scaler)
 535
 536         # Rename and define other stuff.
 537         if self.spatial:
 538             self['hours'] = min(arr.shape[0] for arr in self.y)
 539         else:
 540             self['hours'] = len(self['y'])
 541
 542         self['all_features_list'] = self.pop('features_list')
 543         if features_list is None:
 544             print("Using all input features.")
 545             self.features_list = self.all_features_list
 546         else:
 547             self.features_list = features_list
 548         # self.run_checks()
 549         self.__dict__.update(self)
 550
 551     # TODO: Fix checks for multilocation
 552     def run_checks(self, verbose=True):
 553         """
 554         Validates that required keys are present and checks the integrity of data shapes.
 555
 556         Parameters:
 557         -----------
 558         verbose : bool, optional
 559             If True, prints status messages. Default is True.
 560         """
 561         missing_keys = self.required_keys - self.keys()
 562         if missing_keys:
 563             raise KeyError(f"Missing required keys: {missing_keys}")
 564         # # Check y 1-d
 565         # y_shape = np.shape(self.y)
 566         # if not (len(y_shape) == 1 or (len(y_shape) == 2 and y_shape[1] == 1)):
 567         #     raise ValueError(f"'y' must be one-dimensional, with shape (N,) or (N, 1). Current shape is {y_shape}.")
 568
 569         # # Check if 'hours' is provided and matches len(y)
 570         # if 'hours' in self:
 571         #     if self.hours != len(self.y):
 572         #         raise ValueError(f"Provided 'hours' value {self.hours} does not match the length of 'y', which is {len(self.y)}.")
 573         # Check desired subset of features is in all input features
 574         if not all_items_exist(self.features_list, self.all_features_list):
 575             raise ValueError(f"Provided 'features_list' {self.features_list} has elements not in input features.")
 576     def set_scaler(self, scaler):
 577         """
 578         Sets the scaler to be used for data normalization.
 579
 580         Parameters:
 581         -----------
 582         scaler : str
 583             The name of the scaler (e.g., 'minmax', 'standard').
 584         """
 585         recognized_scalers = ['minmax', 'standard']
 586         if scaler in recognized_scalers:
 587             print(f"Setting data scaler: {scaler}")
 588             self.scaler = scalers[scaler]
 589         else:
 590             raise ValueError(f"Unrecognized scaler '{scaler}'. Recognized scalers are: {recognized_scalers}.")
 591     def train_test_split(self, time_fracs=[1.,0.,0.], space_fracs=[1.,0.,0.], subset_features=True, features_list=None, verbose=True):
 592         """
 593         Splits the data into training, validation, and test sets.
 594
 595         Parameters:
 596         -----------
 597         train_frac : float
 598             The fraction of data to be used for training.
 599         val_frac : float, optional
 600             The fraction of data to be used for validation. Default is 0.0.
 601         subset_features : bool, optional
 602             If True, subsets the data to the specified features list. Default is True.
 603         features_list : list, optional
 604             A list of features to use for subsetting. Default is None.
 605         split_space : bool, optional
 606             Whether to split the data based on space. Default is False.
 607         verbose : bool, optional
 608             If True, prints status messages. Default is True.
 609         """
 610         # Indicate whether multi timeseries or not
 611         spatial = self.spatial
 612
 613         # Set up
 614         assert np.sum(time_fracs) == np.sum(space_fracs) == 1., f"Provided cross validation params don't sum to 1"
 615         if (len(time_fracs) != 3) or (len(space_fracs) != 3):
 616             raise ValueError("Cross-validation params `time_fracs` and `space_fracs` must be lists of length 3, representing (train/validation/test)")
 617
 618         train_frac = time_fracs[0]
 619         val_frac = time_fracs[1]
 620         test_frac = time_fracs[2]
 621
 622         # Setup train/val/test in time
 623         train_ind = int(np.floor(self.hours * train_frac)); self.train_ind = train_ind
 624         test_ind= int(train_ind + round(self.hours * val_frac)); self.test_ind = test_ind
 625         # Check for any potential issues with indices
 626         if test_ind > self.hours:
 627             print(f"Setting test index to {self.hours}")
 628             test_ind = self.hours
 629         if train_ind > test_ind:
 630             raise ValueError("Train index must be less than test index.")
 631
 632         # Setup train/val/test in space
 633         if spatial:
 634             train_frac_sp = space_fracs[0]
 635             val_frac_sp = space_fracs[1]
 636             locs = np.arange(len(self.loc['STID'])) # indices of locations
 637             train_size = int(len(locs) * train_frac_sp)
 638             val_size = int(len(locs) * val_frac_sp)
 639             random.shuffle(locs)
 640             train_locs = locs[:train_size]
 641             val_locs = locs[train_size:train_size + val_size]
 642             test_locs = locs[train_size + val_size:]
 643             # Store Lists of IDs in loc subdirectory
 644             self.loc['train_locs'] = [self.case[i] for i in train_locs]
 645             self.loc['val_locs'] = [self.case[i] for i in val_locs]
 646             self.loc['test_locs'] = [self.case[i] for i in test_locs]
 647
 648
 649         # Extract data to desired features, copy to avoid changing input objects
 650         X = self.X.copy()
 651         y = self.y.copy()
 652         if subset_features:
 653             if verbose and self.features_list != self.all_features_list:
 654                 print(f"Subsetting input data to features_list: {self.features_list}")
 655             # Indices to subset all features with based on params features
 656             indices = []
 657             for item in self.features_list:
 658                 if item in self.all_features_list:
 659                     indices.append(self.all_features_list.index(item))
 660                 else:
 661                     print(f"Warning: feature name '{item}' not found in list of all features from input data")
 662             if spatial:
 663                 X = [Xi[:, indices] for Xi in X]
 664             else:
 665                 X = X[:, indices]
 666
 667         # Training data from 0 to train_ind
 668         # Validation data from train_ind to test_ind
 669         # Test data from test_ind to end
 670         if spatial:
 671             X_train = [X[i] for i in train_locs]
 672             X_val = [X[i] for i in val_locs]
 673             X_test = [X[i] for i in test_locs]
 674             y_train = [y[i] for i in train_locs]
 675             y_val = [y[i] for i in val_locs]
 676             y_test = [y[i] for i in test_locs]
 677
 678             self.X_train = [Xi[:train_ind] for Xi in X_train]
 679             self.y_train = [yi[:train_ind].reshape(-1,1) for yi in y_train]
 680             if (val_frac >0) and (val_frac_sp)>0:
 681                 self.X_val = [Xi[train_ind:test_ind] for Xi in X_val]
 682                 self.y_val = [yi[train_ind:test_ind].reshape(-1,1) for yi in y_val]
 683             self.X_test = [Xi[test_ind:] for Xi in X_test]
 684             self.y_test = [yi[test_ind:].reshape(-1,1) for yi in y_test]
 685         else:
 686             self.X_train = X[:train_ind]
 687             self.y_train = y[:train_ind].reshape(-1,1) # assumes y 1-d, change this if vector output
 688             if val_frac >0:
 689                 self.X_val = X[train_ind:test_ind]
 690                 self.y_val = y[train_ind:test_ind].reshape(-1,1) # assumes y 1-d, change this if vector output
 691             self.X_test = X[test_ind:]
 692             self.y_test = y[test_ind:].reshape(-1,1) # assumes y 1-d, change this if vector output
 693
 694
 695
 696         # Print statements if verbose
 697         if verbose:
 698             print(f"Train index: 0 to {train_ind}")
 699             print(f"Validation index: {train_ind} to {test_ind}")
 700             print(f"Test index: {test_ind} to {self.hours}")
 701
 702             if spatial:
 703                 print("Subsetting locations into train/val/test")
 704                 print(f"Total Locations: {len(locs)}")
 705                 print(f"Train Locations: {len(train_locs)}")
 706                 print(f"Val. Locations: {len(val_locs)}")
 707                 print(f"Test Locations: {len(test_locs)}")
 708                 print(f"X_train[0] shape: {self.X_train[0].shape}, y_train[0] shape: {self.y_train[0].shape}")
 709                 print(f"X_val[0] shape: {self.X_val[0].shape}, y_val[0] shape: {self.y_val[0].shape}")
 710                 print(f"X_test[0] shape: {self.X_test[0].shape}, y_test[0] shape: {self.y_test[0].shape}")
 711             else:
 712                 print(f"X_train shape: {self.X_train.shape}, y_train shape: {self.y_train.shape}")
 713                 if hasattr(self, "X_val"):
 714                     print(f"X_val shape: {self.X_val.shape}, y_val shape: {self.y_val.shape}")
 715                 print(f"X_test shape: {self.X_test.shape}, y_test shape: {self.y_test.shape}")
 716     def scale_data(self, verbose=True):
 717         """
 718         Scales the training data using the set scaler.
 719
 720         Parameters:
 721         -----------
 722         verbose : bool, optional
 723             If True, prints status messages. Default is True.
 724         """
 725         # Indicate whether multi timeseries or not
 726         spatial = self.spatial
 727         if self.scaler is None:
 728             raise ValueError("Scaler is not set. Use 'set_scaler' method to set a scaler before scaling data.")
 729         # if hasattr(self.scaler, 'n_features_in_'):
 730         #     warnings.warn("Scale_data has already been called. Exiting to prevent issues.")
 731         #     return
 732         if not hasattr(self, "X_train"):
 733             raise AttributeError("No X_train within object. Run train_test_split first. This is to avoid fitting the scaler with prediction data.")
 734         if verbose:
 735             print(f"Scaling training data with scaler {self.scaler}, fitting on X_train")
 736
 737         if spatial:
 738             # Fit scaler on row-joined training data
 739             self.scaler.fit(np.vstack(self.X_train))
 740             # Transform data using fitted scaler
 741             self.X_train = [self.scaler.transform(Xi) for Xi in self.X_train]
 742             if hasattr(self, 'X_val'):
 743                 self.X_val = [self.scaler.transform(Xi) for Xi in self.X_val]
 744             self.X_test = [self.scaler.transform(Xi) for Xi in self.X_test]
 745         else:
 746             # Fit the scaler on the training data
 747             self.scaler.fit(self.X_train)
 748             # Transform the data using the fitted scaler
 749             self.X_train = self.scaler.transform(self.X_train)
 750             if hasattr(self, 'X_val'):
 751                 self.X_val = self.scaler.transform(self.X_val)
 752             self.X_test = self.scaler.transform(self.X_test)
 753
 754     # NOTE: only works for non spatial
 755     def scale_all_X(self, verbose=True):
 756         """
 757         Scales the all data using the set scaler.
 758
 759         Parameters:
 760         -----------
 761         verbose : bool, optional
 762             If True, prints status messages. Default is True.
 763         Returns:
 764         -------
 765         ndarray
 766             Scaled X matrix, subsetted to features_list.
 767         """
 768         if self.spatial:
 769             raise ValueError("Not implemented for spatial data")
 770
 771         if self.scaler is None:
 772             raise ValueError("Scaler is not set. Use 'set_scaler' method to set a scaler before scaling data.")
 773         if verbose:
 774             print(f"Scaling all X data with scaler {self.scaler}, fitted on X_train")
 775         # Subset features
 776         indices = []
 777         for item in self.features_list:
 778             if item in self.all_features_list:
 779                 indices.append(self.all_features_list.index(item))
 780             else:
 781                 print(f"Warning: feature name '{item}' not found in list of all features from input data")
 782         X = self.X[:, indices]
 783         X = self.scaler.transform(X)
 784
 785         return X
 786
 787     def inverse_scale(self, return_X = 'all_hours', save_changes=False, verbose=True):
 788         """
 789         Inversely scales the data to its original form.
 790
 791         Parameters:
 792         -----------
 793         return_X : str, optional
 794             Specifies what data to return after inverse scaling. Default is 'all_hours'.
 795         save_changes : bool, optional
 796             If True, updates the internal data with the inversely scaled values. Default is False.
 797         verbose : bool, optional
 798             If True, prints status messages. Default is True.
 799         """
 800         if verbose:
 801             print("Inverse scaling data...")
 802         X_train = self.scaler.inverse_transform(self.X_train)
 803         X_val = self.scaler.inverse_transform(self.X_val)
 804         X_test = self.scaler.inverse_transform(self.X_test)
 805
 806         if save_changes:
 807             print("Inverse transformed data saved")
 808             self.X_train = X_train
 809             self.X_val = X_val
 810             self.X_test = X_test
 811         else:
 812             if verbose:
 813                 print("Inverse scaled, but internal data not changed.")
 814         if verbose:
 815             print(f"Attempting to return {return_X}")
 816         if return_X == "all_hours":
 817             return np.concatenate((X_train, X_val, X_test), axis=0)
 818         else:
 819             print(f"Unrecognized or unimplemented return value {return_X}")
 820     def batch_reshape(self, timesteps, batch_size, hours=None, verbose=False, start_times=None):
 821         """
 822         Restructures input data to RNN using batches and sequences.
 823
 824         Parameters:
 825         ----------
 826         batch_size : int
 827             The size of each training batch to reshape the data.
 828         timesteps : int
 829             The number of timesteps or sequence length. Consistitutes a single sample
 830         timesteps : int
 831             Number of timesteps or sequence length used for a single sequence in RNN training. Constitutes a single sample to the model
 832
 833         batch_size : int
 834             Number of sequences used within a batch of training
 835
 836         Returns:
 837         -------
 838         None
 839             This method reshapes the data in place.
 840         Raises:
 841         ------
 842         AttributeError
 843             If either 'X_train' or 'y_train' attributes do not exist within the instance.
 844
 845         Notes:
 846         ------
 847         The reshaping method depends on self param "spatial".
 848         - spatial == False: Reshapes data assuming no spatial dimensions.
 849         - spatial == True: Reshapes data considering spatial dimensions.
 850
 851         """
 852
 853         if not hasattr(self, 'X_train') or not hasattr(self, 'y_train'):
 854             raise AttributeError("Both 'X_train' and 'y_train' must be set before reshaping batches.")
 855
 856         # Indicator of spatial training scheme or not
 857         spatial = self.spatial
 858
 859         if spatial:
 860             print(f"Reshaping spatial training data using batch size: {batch_size} and timesteps: {timesteps}")
 861             self.X_train, self.y_train, self.n_seqs = staircase_spatial(self.X_train, self.y_train, timesteps = timesteps, batch_size=batch_size, hours=hours, verbose=verbose, start_times=start_times)
 862             if hasattr(self, "X_val"):
 863                 print(f"Reshaping validation data using batch size: {batch_size} and timesteps: {timesteps}")
 864                 self.X_val, self.y_val, _ = staircase_spatial(self.X_val, self.y_val, timesteps = timesteps, batch_size=batch_size, hours=None, verbose=verbose, start_times=start_times)
 865         else:
 866             print(f"Reshaping training data using batch size: {batch_size} and timesteps: {timesteps}")
 867             self.X_train, self.y_train = staircase_2(self.X_train, self.y_train, timesteps = timesteps, batch_size=batch_size, verbose=verbose)
 868             if hasattr(self, "X_val"):
 869                 print(f"Reshaping validation data using batch size: {batch_size} and timesteps: {timesteps}")
 870                 self.X_val, self.y_val = staircase_2(self.X_val, self.y_val, timesteps = timesteps, batch_size=batch_size, verbose=verbose)
 871         if self.X_train.shape[0] == 0:
 872             raise ValueError("X_train has zero rows. Try different combo of cross-validation fractions, batch size or start_times. Train/val/test data partially processed, need to return train_test_split")
 873
 874     def print_hashes(self, attrs_to_check = ['X', 'y', 'X_train', 'y_train', 'X_val', 'y_val', 'X_test', 'y_test']):
 875         """
 876         Prints the hash of specified data attributes.
 877
 878         Parameters:
 879         -----------
 880         attrs_to_check : list, optional
 881             A list of attribute names to hash and print. Default includes 'X', 'y', and split data.
 882         """
 883         for attr in attrs_to_check:
 884             if hasattr(self, attr):
 885                 value = getattr(self, attr)
 886                 if self.spatial:
 887                     pass
 888                 else:
 889                     print(f"Hash of {attr}: {hash_ndarray(value)}")
 890     def __getattr__(self, key):
 891         """
 892         Allows attribute-style access to dictionary keys, a.k.a. enables the "." operator for get elements
 893         """
 894         try:
 895             return self[key]
 896         except KeyError:
 897             raise AttributeError(f"'rnn_data' object has no attribute '{key}'")
 898
 899     def __setitem__(self, key, value):
 900         """
 901         Ensures dictionary and attribute updates stay in sync for required keys.
 902         """
 903         super().__setitem__(key, value)  # Update the dictionary
 904         if key in self.required_keys:
 905             super().__setattr__(key, value)  # Ensure the attribute is updated as well
 906
 907     def __setattr__(self, key, value):
 908         """
 909         Ensures dictionary keys are updated when setting attributes.
 910         """
 911         self[key] = value
 912
 913
 914 # Function to check reproduciblity hashes, environment info, and model parameters
 915 def check_reproducibility(dict0, params, m_hash, w_hash):
 916     """
 917     Performs reproducibility checks on a model by comparing current settings and outputs with stored reproducibility information.
 918
 919     Parameters:
 920     -----------
 921     dict0 : dict
 922         The data dictionary that should contain reproducibility information under the 'repro_info' attribute.
 923     params : dict
 924         The current model parameters to be checked against the reproducibility information.
 925     m_hash : str
 926         The hash of the current model predictions.
 927     w_hash : str
 928         The hash of the current fitted model weights.
 929
 930     Returns:
 931     --------
 932     None
 933         The function returns None. It issues warnings if any reproducibility checks fail.
 934
 935     Notes:
 936     ------
 937     - Checks are only performed if the `dict0` contains the 'repro_info' attribute.
 938     - Issues warnings for mismatches in model weights, predictions, Python version, TensorFlow version, and model parameters.
 939     - Skips checks if physics-based initialization is used (not implemented).
 940     """
 941     if not hasattr(dict0, "repro_info"):
 942         warnings.warn("The provided data dictionary does not have the required 'repro_info' attribute. Not running reproduciblity checks.")
 943         return
 944
 945     repro_info = dict0.repro_info
 946     # Check Hashes
 947     if params['phys_initialize']:
 948         hashes = repro_info['phys_initialize']
 949         warnings.warn("Physics Initialization not implemented yet. Not running reproduciblity checks.")
 950     else:
 951         hashes = repro_info['rand_initialize']
 952         print(f"Fitted weights hash: {w_hash} \n Reproducibility weights hash: {hashes['fitted_weights_hash']}")
 953         print(f"Model predictions hash: {m_hash} \n Reproducibility preds hash: {hashes['preds_hash']}")
 954         if (w_hash != hashes['fitted_weights_hash']) or (m_hash != hashes['preds_hash']):
 955             if w_hash != hashes['fitted_weights_hash']:
 956                 warnings.warn("The fitted weights hash does not match the reproducibility weights hash.")
 957             if m_hash != hashes['preds_hash']:
 958                 warnings.warn("The predictions hash does not match the reproducibility predictions hash.")
 959         else:
 960             print("***Reproducibility Checks passed - model weights and model predictions match expected.***")
 961
 962     # Check Environment
 963     current_py_version = sys.version[0:6]
 964     current_tf_version = tf.__version__
 965     if current_py_version != repro_info['env_info']['py_version']:
 966         warnings.warn(f"Python version mismatch: Current Python version is {current_py_version}, "
 967                       f"expected {repro_info['env_info']['py_version']}.")
 968
 969     if current_tf_version != repro_info['env_info']['tf_version']:
 970         warnings.warn(f"TensorFlow version mismatch: Current TensorFlow version is {current_tf_version}, "
 971                       f"expected {repro_info['env_info']['tf_version']}.")
 972
 973     # Check Params
 974     repro_params = repro_info.get('params', {})
 975
 976     for key, repro_value in repro_params.items():
 977         if key in params:
 978             if params[key] != repro_value:
 979                 warnings.warn(f"Parameter mismatch for '{key}': Current value is {params[key]}, "
 980                               f"repro value is {repro_value}.")
 981         else:
 982             warnings.warn(f"Parameter '{key}' is missing in the current params.")
 983
 984     return
 985
 986 class RNNModel(ABC):
 987     """
 988     Abstract base class for RNN models, providing structure for training, predicting, and running reproducibility checks.
 989     """
 990     def __init__(self, params: dict):
 991         """
 992         Initializes the RNNModel with the given parameters.
 993
 994         Parameters:
 995         -----------
 996         params : dict
 997             A dictionary containing model parameters.
 998         """
 999         self.params = params
1000         if type(self) is RNNModel:
1001             raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
1002         super().__init__()
1003
1004     @abstractmethod
1005     def _build_model_train(self):
1006         """Abstract method to build the training model."""
1007         pass
1008
1009     @abstractmethod
1010     def _build_model_predict(self, return_sequences=True):
1011         """Abstract method to build the prediction model. This model copies weights from the train model but with input structure that allows for easier prediction of arbitrary length timeseries. This model is not to be used for training, or don't use with .fit calls"""
1012         pass
1013
1014     def is_stateful(self):
1015         """
1016         Checks whether any of the layers in the internal model (self.model_train) are stateful.
1017
1018         Returns:
1019         bool: True if at least one layer in the model is stateful, False otherwise.
1020
1021         This method iterates over all the layers in the model and checks if any of them
1022         have the 'stateful' attribute set to True. This is useful for determining if
1023         the model is designed to maintain state across batches during training.
1024
1025         Example:
1026         --------
1027         model.is_stateful()
1028         """
1029         for layer in self.model_train.layers:
1030             if hasattr(layer, 'stateful') and layer.stateful:
1031                 return True
1032         return False
1033
1034     def fit(self, X_train, y_train, plot_history=True, plot_title = '',
1035             weights=None, callbacks=[], validation_data=None, return_epochs=False, *args, **kwargs):
1036         """
1037         Trains the model on the provided training data. Uses the fit method of the training model and then copies the weights over to the prediction model, which has a less restrictive input shape. Formats a list of callbacks to use within the fit method based on params input
1038
1039         Parameters:
1040         -----------
1041         X_train : np.ndarray
1042             The input matrix data for training.
1043         y_train : np.ndarray
1044             The target vector data for training.
1045         plot_history : bool, optional
1046             If True, plots the training history. Default is True.
1047         plot_title : str, optional
1048             The title for the training plot. Default is an empty string.
1049         weights : optional
1050             Initial weights for the model. Default is None.
1051         callbacks : list, optional
1052             A list of callback functions to use during training. Default is an empty list.
1053         validation_data : tuple, optional
1054             Validation data to use during training, expected format (X_val, y_val). Default is None.
1055         return_epochs : bool
1056             If True, return the number of epochs that training took. Used to test and optimize early stopping
1057         """
1058         # verbose_fit argument is for printing out update after each epoch, which gets very long
1059         verbose_fit = self.params['verbose_fit']
1060         verbose_weights = self.params['verbose_weights']
1061         if verbose_weights:
1062             print(f"Training simple RNN with params: {self.params}")
1063
1064         # Setup callbacks
1065         if self.params["reset_states"]:
1066             callbacks=callbacks+[ResetStatesCallback(self.params), TerminateOnNaN()]
1067
1068         # Early stopping callback requires validation data
1069         if validation_data is not None:
1070             X_val, y_val =validation_data[0], validation_data[1]
1071             print("Using early stopping callback.")
1072             early_stop = EarlyStoppingCallback(patience = self.params['early_stopping_patience'])
1073             callbacks=callbacks+[early_stop]
1074         if verbose_weights:
1075             print(f"Formatted X_train hash: {hash_ndarray(X_train)}")
1076             print(f"Formatted y_train hash: {hash_ndarray(y_train)}")
1077             if validation_data is not None:
1078                 print(f"Formatted X_val hash: {hash_ndarray(X_val)}")
1079                 print(f"Formatted y_val hash: {hash_ndarray(y_val)}")
1080             print(f"Initial weights before training hash: {hash_weights(self.model_train)}")
1081
1082         ## TODO: Hidden State Initialization
1083         # Evaluate Model once to set nonzero initial state
1084         # self.model_train(X_train[0:self.params['batch_size'],:,:])
1085
1086         if validation_data is not None:
1087             history = self.model_train.fit(
1088                 X_train, y_train,
1089                 epochs=self.params['epochs'],
1090                 batch_size=self.params['batch_size'],
1091                 callbacks = callbacks,
1092                 verbose=verbose_fit,
1093                 validation_data = (X_val, y_val),
1094                 *args, **kwargs
1095             )
1096         else:
1097             history = self.model_train.fit(
1098                 X_train, y_train,
1099                 epochs=self.params['epochs'],
1100                 batch_size=self.params['batch_size'],
1101                 callbacks = callbacks,
1102                 verbose=verbose_fit,
1103                 *args, **kwargs
1104             )
1105
1106         if plot_history:
1107             self.plot_history(history,plot_title)
1108
1109         if self.params["verbose_weights"]:
1110             print(f"Fitted Weights Hash: {hash_weights(self.model_train)}")
1111
1112         # Update Weights for Prediction Model
1113         w_fitted = self.model_train.get_weights()
1114         self.model_predict.set_weights(w_fitted)
1115
1116         if return_epochs:
1117             # Epoch counting starts at 0, adding 1 for the count
1118             return early_stop.best_epoch + 1
1119
1120     def predict(self, X_test):
1121         """
1122         Generates predictions on the provided test data using the internal prediction model.
1123
1124         Parameters:
1125         -----------
1126         X_test : np.ndarray
1127             The input data for generating predictions.
1128
1129         Returns:
1130         --------
1131         np.ndarray
1132             The predicted values.
1133         """
1134         print("Predicting test data")
1135         X_test = self._format_pred_data(X_test)
1136         preds = self.model_predict.predict(X_test).flatten()
1137         return preds
1138
1139
1140     def _format_pred_data(self, X):
1141         """
1142         Formats the prediction data for RNN input.
1143
1144         Parameters:
1145         -----------
1146         X : np.ndarray
1147             The input data.
1148
1149         Returns:
1150         --------
1151         np.ndarray
1152             The formatted input data.
1153         """
1154         return np.reshape(X,(1, X.shape[0], self.params['n_features']))
1155
1156     def plot_history(self, history, plot_title, create_figure=True):
1157         """
1158         Plots the training history. Uses log scale on y axis for readability.
1159
1160         Parameters:
1161         -----------
1162         history : History object
1163             The training history object from model fitting. Output of keras' .fit command
1164         plot_title : str
1165             The title for the plot.
1166         """
1167
1168         if create_figure:
1169             plt.figure(figsize=(10, 6))
1170         plt.semilogy(history.history['loss'], label='Training loss')
1171         if 'val_loss' in history.history:
1172             plt.semilogy(history.history['val_loss'], label='Validation loss')
1173         plt.title(f'{plot_title} Model loss')
1174         plt.ylabel('Loss')
1175         plt.xlabel('Epoch')
1176         plt.legend(loc='upper left')
1177         plt.show()
1178
1179     def run_model(self, dict0, reproducibility_run=False, plot_period='all', save_outputs=True, return_epochs=False):
1180         """
1181         Runs the RNN model on input data dictionary, including training, prediction, and reproducibility checks.
1182
1183         Parameters:
1184         -----------
1185         dict0 : RNNData (dict)
1186             The dictionary containing the input data and configuration.
1187         reproducibility_run : bool, optional
1188             If True, performs reproducibility checks after running the model. Default is False.
1189         save_outputs : bool
1190             If True, writes model outputs into input dictionary.
1191         return_epochs : bool
1192             If True, returns how many epochs of training happened. Used to optimize params related to early stopping
1193
1194         Returns:
1195         --------
1196         tuple
1197             Model predictions and a dictionary of RMSE errors broken up by time period.
1198         """
1199         verbose_fit = self.params['verbose_fit']
1200         verbose_weights = self.params['verbose_weights']
1201         if verbose_weights:
1202             dict0.print_hashes()
1203         # Extract Datasets
1204         X_train, y_train, X_test, y_test = dict0.X_train, dict0.y_train, dict0.X_test, dict0.y_test
1205         if 'X_val' in dict0:
1206             X_val, y_val = dict0.X_val, dict0.y_val
1207         else:
1208             X_val = None
1209         if dict0.spatial:
1210             case_id = "Spatial Training Set"
1211         else:
1212             case_id = dict0.case
1213
1214         # Fit model
1215         if X_val is None:
1216             eps = self.fit(X_train, y_train, plot_title=case_id, return_epochs=return_epochs)
1217         else:
1218             eps = self.fit(X_train, y_train, validation_data = (X_val, y_val), plot_title=case_id, return_epochs=return_epochs)
1219
1220         # Generate Predictions and Evaluate Test Error
1221         if dict0.spatial:
1222             m, errs = self._eval_multi(dict0)
1223             if save_outputs:
1224                 dict0['m']=m
1225         else:
1226             m, errs = self._eval_single(dict0, verbose_weights, reproducibility_run)
1227             if save_outputs:
1228                 dict0['m']=m
1229             plot_data(dict0, title="RNN", title2=dict0.case, plot_period=plot_period)
1230
1231         if return_epochs:
1232             return m, errs, eps
1233         else:
1234             return m, errs
1235
1236     def _eval_single(self, dict0, verbose_weights, reproducibility_run):
1237         # Generate Predictions,
1238         # run through training to get hidden state set properly for forecast period
1239         print(f"Running prediction on all input data, Training through Test")
1240         X = dict0.scale_all_X()
1241         y = dict0.y.flatten()
1242         # Predict
1243         if verbose_weights:
1244             print(f"All X hash: {hash_ndarray(X)}")
1245
1246         m = self.predict(X).flatten()
1247         if verbose_weights:
1248             print(f"Predictions Hash: {hash_ndarray(m)}")
1249
1250         if reproducibility_run:
1251             print("Checking Reproducibility")
1252             check_reproducibility(dict0, self.params, hash_ndarray(m), hash_weights(self.model_predict))
1253
1254         # print(dict0.keys())
1255         # Plot final fit and data
1256         # dict0['y'] = y
1257         # plot_data(dict0, title="RNN", title2=dict0['case'], plot_period=plot_period)
1258
1259         # Calculate Errors
1260         err = rmse(m, y)
1261         train_ind = dict0.train_ind # index of final training set value
1262         test_ind = dict0.test_ind # index of first test set value
1263
1264         err_train = rmse(m[:train_ind], y[:train_ind].flatten())
1265         err_pred = rmse(m[test_ind:], y[test_ind:].flatten())
1266         rmse_dict = {
1267             'all': err,
1268             'training': err_train,
1269             'prediction': err_pred
1270         }
1271         return m, rmse_dict
1272
1273     def _eval_multi(self, dict0):
1274         # Train Error: NOT DOING YET. DECIDE WHETHER THIS IS NEEDED
1275
1276         # Test Error
1277         new_data = np.stack(dict0.X_test, axis=0)
1278         y_array = np.stack(dict0.y_test, axis=0)
1279         preds = self.model_predict.predict(new_data)
1280
1281         # Calculate RMSE
1282         ## Note: not using util rmse function since this approach is for 3d arrays
1283         # Compute the squared differences
1284         squared_diff = np.square(preds - y_array)
1285
1286         # Mean squared error along the timesteps and dimensions (axis 1 and 2)
1287         mse = np.mean(squared_diff, axis=(1, 2))
1288
1289         # Root mean squared error (RMSE) for each timeseries
1290         rmses = np.sqrt(mse)
1291
1292         return preds, rmses
1293
1294
1295 ## Callbacks
1296
1297 # Helper functions for batch reset schedules
1298 def calc_exp_intervals(bmin, bmax, n_epochs, force_bmax = True):
1299     # Calculate the exponential intervals for each epoch
1300     epochs = np.arange(n_epochs)
1301     factors = epochs / n_epochs
1302     intervals = bmin * (bmax / bmin) ** factors
1303     if force_bmax:
1304         intervals[-1] = bmax  # Ensure the last value is exactly bmax
1305     return intervals.astype(int)
1306
1307 def calc_log_intervals(bmin, bmax, n_epochs, force_bmax = True):
1308     # Calculate the logarithmic intervals for each epoch
1309     epochs = np.arange(n_epochs)
1310     factors = np.log(1 + epochs) / np.log(1 + n_epochs)
1311     intervals = bmin + (bmax - bmin) * factors
1312     if force_bmax:
1313         intervals[-1] = bmax  # Ensure the last value is exactly bmax
1314     return intervals.astype(int)
1315
1316 class ResetStatesCallback(Callback):
1317     """
1318     Custom callback to reset the states of RNN layers at the end of each epoch and optionally after a specified number of batches.
1319
1320     Parameters:
1321     -----------
1322     batch_reset : int, optional
1323         If provided, resets the states of RNN layers after every `batch_reset` batches. Default is None.
1324     """
1325     # def __init__(self, bmin=None, bmax=None, epochs=None, loc_batch_reset = None, batch_schedule_type='linear', verbose=True):
1326     def __init__(self, params=None, verbose=True):
1327         """
1328         Initializes the ResetStatesCallback with an optional batch reset interval.
1329
1330         Parameters:
1331         -----------
1332         params: dict, optional
1333             Dictionary of parameters. If None provided, only on_epoch_end will trigger reset of hidden states.
1334             - bmin : int
1335                 Minimum for batch reset schedule
1336             - bmax : int
1337                 Maximum for batch reset schedule
1338             - epochs : int
1339                 Number of training epochs.
1340             - loc_batch_reset : int
1341                 Interval of batches after which to reset the states of RNN layers for location changes. Triggers reset for training AND validation phases
1342             - batch_schedule_type : str
1343                 Type of batch scheduling to be used. Recognized methods are following:
1344                 - 'constant' : Used fixed batch reset interval throughout training
1345                 - 'linear'   : Increases the batch reset interval linearly over epochs from bmin to bmax.
1346                 - 'exp'      : Increases the batch reset interval exponentially over epochs from bmin to bmax.
1347                 - 'log'      : Increases the batch reset interval logarithmically over epochs from bmin to bmax.
1348
1349
1350         Returns:
1351         -----------
1352         Only in-place reset of hidden states of RNN that calls uses this callback.
1353
1354         """
1355         super(ResetStatesCallback, self).__init__()
1356
1357         # Check for optional arguments, set None if missing in input params
1358         arg_list = ['bmin', 'bmax', 'epochs', 'loc_batch_reset', 'batch_schedule_type']
1359         for arg in arg_list:
1360             setattr(self, arg, params.get(arg, None))
1361
1362         self.verbose = verbose
1363         if self.verbose:
1364             print(f"Using ResetStatesCallback with Batch Reset Schedule: {self.batch_schedule_type}")
1365         # Calculate the reset intervals for each epoch during initialization
1366         if self.batch_schedule_type is not None:
1367             if self.epochs is None:
1368                 raise ValueError(f"Arugment `epochs` cannot be none with self.batch_schedule_type: {self.batch_schedule_type}")
1369             self.batch_reset_intervals = self._calc_reset_intervals(self.batch_schedule_type)
1370             if self.verbose:
1371                 print(f"batch_reset_intervals: {self.batch_reset_intervals}")
1372         else:
1373             self.batch_reset_intervals = None
1374     def on_epoch_end(self, epoch, logs=None):
1375         """
1376         Resets the states of RNN layers at the end of each epoch.
1377
1378         Parameters:
1379         -----------
1380         epoch : int
1381             The index of the current epoch.
1382         logs : dict, optional
1383             A dictionary containing metrics from the epoch. Default is None.
1384         """
1385         # print(f" Resetting hidden state after epoch: {epoch+1}", flush=True)
1386         # Iterate over each layer in the model
1387         for layer in self.model.layers:
1388             # Check if the layer has a reset_states method
1389             if hasattr(layer, 'reset_states'):
1390                 layer.reset_states()
1391     def _calc_reset_intervals(self,batch_schedule_type):
1392         methods = ['constant', 'linear', 'exp', 'log']
1393         if batch_schedule_type not in methods:
1394             raise ValueError(f"Batch schedule method {batch_schedule_type} not recognized. \n Available methods: {methods}")
1395         if batch_schedule_type == "constant":
1396
1397             return np.repeat(self.bmin, self.epochs).astype(int)
1398         elif batch_schedule_type == "linear":
1399             return np.linspace(self.bmin, self.bmax, self.epochs).astype(int)
1400         elif batch_schedule_type == "exp":
1401             return calc_exp_intervals(self.bmin, self.bmax, self.epochs)
1402         elif batch_schedule_type == "log":
1403             return calc_log_intervals(self.bmin, self.bmax, self.epochs)
1404     def on_epoch_begin(self, epoch, logs=None):
1405         # Set the reset interval for the current epoch
1406         if self.batch_reset_intervals is not None:
1407             self.current_batch_reset = self.batch_reset_intervals[epoch]
1408         else:
1409             self.current_batch_reset = None
1410     def on_train_batch_end(self, batch, logs=None):
1411         """
1412         Resets the states of RNN layers during training after a specified number of batches, if `batch_reset` or `loc_batch_reset` are provided. The `batch_reset` is used for stability and to avoid exploding gradients at the beginning of training when a hidden state is being passed with weights that haven't learned yet. The `loc_batch_reset` is used to reset the states when a particular batch is from a new location and thus the hidden state should be passed.
1413
1414         Parameters:
1415         -----------
1416         batch : int
1417             The index of the current batch.
1418         logs : dict, optional
1419             A dictionary containing metrics from the batch. Default is None.
1420         """
1421         batch_reset = self.current_batch_reset
1422         if (batch_reset is not None and batch % batch_reset == 0):
1423             # print(f" Resetting states after batch {batch + 1}")
1424             # Iterate over each layer in the model
1425             for layer in self.model.layers:
1426                 # Check if the layer has a reset_states method
1427                 if hasattr(layer, 'reset_states'):
1428                     layer.reset_states()
1429     def on_test_batch_end(self, batch, logs=None):
1430         """
1431         Resets the states of RNN layers during validation if `loc_batch_reset` is provided to demarcate a new location and thus avoid passing a hidden state to a wrong location.
1432
1433         Parameters:
1434         -----------
1435         batch : int
1436             The index of the current batch.
1437         logs : dict, optional
1438             A dictionary containing metrics from the batch. Default is None.
1439         """
1440         loc_batch_reset = self.loc_batch_reset
1441         if (loc_batch_reset is not None and batch % loc_batch_reset == 0):
1442             # print(f"Resetting states in Validation mode after batch {batch + 1}")
1443             # Iterate over each layer in the model
1444             for layer in self.model.layers:
1445                 # Check if the layer has a reset_states method
1446                 if hasattr(layer, 'reset_states'):
1447                     layer.reset_states()
1448
1449 ## Learning Schedules
1450 ## NOT TESTED YET
1451 lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
1452     initial_learning_rate=0.01,
1453     decay_steps=200,
1454     alpha=0.0,
1455     name='CosineDecay',
1456     # warmup_target=None,
1457     # warmup_steps=100
1458 )
1459 ##
1460
1461 def EarlyStoppingCallback(patience=5):
1462     """
1463     Creates an EarlyStopping callback with the specified patience.
1464
1465     Args:
1466         patience (int): Number of epochs with no improvement after which training will be stopped.
1467
1468     Returns:
1469         EarlyStopping: Configured EarlyStopping callback.
1470     """
1471     return EarlyStopping(
1472         monitor='val_loss',
1473         patience=patience,
1474         verbose=1,
1475         mode='min',
1476         restore_best_weights=True
1477     )
1478
1479 phys_params = {
1480     'DeltaE': [0,-1],                    # bias correction
1481     'T1': 0.1,                           # 1/fuel class (10)
1482     'fm_raise_vs_rain': 0.2              # fm increase per mm rain
1483 }
1484
1485
1486
1487 def get_initial_weights(model_fit,params,scale_fm=1):
1488     # Given a RNN architecture and hyperparameter dictionary, return array of physics-initiated weights
1489     # Inputs:
1490     # model_fit: output of create_RNN_2 with no training
1491     # params: (dict) dictionary of hyperparameters
1492     # rnn_dat: (dict) data dictionary, output of create_rnn_dat
1493     # Returns: numpy ndarray of weights that should be a rough solution to the moisture ODE
1494     DeltaE = phys_params['DeltaE']
1495     T1 = phys_params['T1']
1496     fmr = phys_params['fm_raise_vs_rain']
1497     centering = params['centering']  # shift activation down
1498
1499     w0_initial={'Ed':(1.-np.exp(-T1))/2,
1500                 'Ew':(1.-np.exp(-T1))/2,
1501                 'rain':fmr * scale_fm}   # wx - input feature
1502                                  #  wh      wb   wd    bd = bias -1
1503
1504     w_initial=np.array([np.nan, np.exp(-0.1), DeltaE[0]/scale_fm, # layer 0
1505                         1.0, -centering[0] + DeltaE[1]/scale_fm])                 # layer 1
1506     if params['verbose_weights']:
1507         print('Equilibrium moisture correction bias',DeltaE[0],
1508               'in the hidden layer and',DeltaE[1],' in the output layer')
1509
1510     w_name = ['wx','wh','bh','wd','bd']
1511
1512     w=model_fit.get_weights()
1513     for j in range(w[0].shape[0]):
1514             feature = params['features_list'][j]
1515             for k in range(w[0].shape[1]):
1516                     w[0][j][k]=w0_initial[feature]
1517     for i in range(1,len(w)):            # number of the weight
1518         for j in range(w[i].shape[0]):   # number of the inputs
1519             if w[i].ndim==2:
1520                 # initialize all entries of the weight matrix to the same number
1521                 for k in range(w[i].shape[1]):
1522                     w[i][j][k]=w_initial[i]/w[i].shape[0]
1523             elif w[i].ndim==1:
1524                 w[i][j]=w_initial[i]
1525             else:
1526                 print('weight',i,'shape',w[i].shape)
1527                 raise ValueError("Only 1 or 2 dimensions supported")
1528         if params['verbose_weights']:
1529             print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
1530                   'initial: sum',np.sum(w[i],axis=0),'\nentries',w[i])
1531
1532     return w, w_name
1533
1534 class RNN(RNNModel):
1535     """
1536     A concrete implementation of the RNNModel abstract base class, using simple recurrent cells for hidden recurrent layers.
1537
1538     Parameters:
1539     -----------
1540     params : dict
1541         A dictionary of model parameters.
1542     loss : str, optional
1543         The loss function to use during model training. Default is 'mean_squared_error'.
1544     """
1545     def __init__(self, params, loss='mean_squared_error'):
1546         """
1547         Initializes the RNN model by building the training and prediction models.
1548
1549         Parameters:
1550         -----------
1551         params : dict or RNNParams
1552             A dictionary containing the model's parameters.
1553         loss : str, optional
1554             The loss function to use during model training. Default is 'mean_squared_error'.
1555         """
1556         super().__init__(params)
1557         self.model_train = self._build_model_train()
1558         self.model_predict = self._build_model_predict()
1559
1560     def _build_model_train(self):
1561         """
1562         Builds and compiles the training model, with batch & sequence shape specifications for input.
1563
1564         Returns:
1565         --------
1566         model : tf.keras.Model
1567             The compiled Keras model for training.
1568         """
1569         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
1570         x = inputs
1571         for i in range(self.params['rnn_layers']):
1572             # Return sequences True if recurrent layer feeds into another recurrent layer.
1573             # False if feeds into dense layer
1574             return_sequences = True if i < self.params['rnn_layers'] - 1 else False
1575             x = SimpleRNN(
1576                 units=self.params['rnn_units'],
1577                 activation=self.params['activation'][0],
1578                 dropout=self.params["dropout"][0],
1579                 recurrent_dropout = self.params["recurrent_dropout"],
1580                 stateful=self.params['stateful'],
1581                 return_sequences=return_sequences)(x)
1582         if self.params["dropout"][1] > 0:
1583             x = Dropout(self.params["dropout"][1])(x)
1584         for i in range(self.params['dense_layers']):
1585             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1586         # Add final output layer, must be 1 dense cell with linear activation if continuous scalar output
1587         x = Dense(units=1, activation='linear')(x)
1588         model = tf.keras.Model(inputs=inputs, outputs=x)
1589         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1590         # optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
1591         model.compile(loss='mean_squared_error', optimizer=optimizer)
1592
1593         if self.params["verbose_weights"]:
1594             print(f"Initial Weights Hash: {hash_weights(model)}")
1595             # print(model.get_weights())
1596
1597         if self.params['phys_initialize']:
1598             assert self.params['scaler'] == 'reproducibility', f"Not implemented yet to do physics initialize with given data scaling {self.params['scaler']}"
1599             assert self.params['features_list'] == ['Ed', 'Ew', 'rain'], f"Physics initiation can only be done with features ['Ed', 'Ew', 'rain'], but given features {self.params['features_list']}"
1600             print("Initializing Model with Physics based weights")
1601             w, w_name=get_initial_weights(model, self.params)
1602             model.set_weights(w)
1603             print('initial weights hash =',hash_weights(model))
1604         return model
1605
1606     def _build_model_predict(self, return_sequences=True):
1607         """
1608         Builds and compiles the prediction model, doesn't use batch shape nor sequence length to make it easier to predict arbitrary number of timesteps. This model has weights copied over from training model is not directly used for training itself.
1609
1610         Parameters:
1611         -----------
1612         return_sequences : bool, optional
1613             Whether to return the full sequence of outputs. Default is True.
1614
1615         Returns:
1616         --------
1617         model : tf.keras.Model
1618             The compiled Keras model for prediction.
1619         """
1620         inputs = tf.keras.Input(shape=(None,self.params['n_features']))
1621         x = inputs
1622         for i in range(self.params['rnn_layers']):
1623             x = SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
1624                   stateful=False,return_sequences=return_sequences)(x)
1625         for i in range(self.params['dense_layers']):
1626             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1627         # Add final output layer, must be 1 dense cell with linear activation if continuous scalar output
1628         x = Dense(units=1, activation='linear')(x)
1629         model = tf.keras.Model(inputs=inputs, outputs=x)
1630         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1631         model.compile(loss='mean_squared_error', optimizer=optimizer)
1632
1633         # Set Weights to model_train
1634         w_fitted = self.model_train.get_weights()
1635         model.set_weights(w_fitted)
1636
1637         return model
1638
1639
1640 class RNN_LSTM(RNNModel):
1641     """
1642     A concrete implementation of the RNNModel abstract base class, use LSTM cells for hidden recurrent layers.
1643
1644     Parameters:
1645     -----------
1646     params : dict
1647         A dictionary of model parameters.
1648     loss : str, optional
1649         The loss function to use during model training. Default is 'mean_squared_error'.
1650     """
1651     def __init__(self, params, loss='mean_squared_error'):
1652         """
1653         Initializes the RNN model by building the training and prediction models.
1654
1655         Parameters:
1656         -----------
1657         params : dict or RNNParams
1658             A dictionary containing the model's parameters.
1659         loss : str, optional
1660             The loss function to use during model training. Default is 'mean_squared_error'.
1661         """
1662         super().__init__(params)
1663         self.model_train = self._build_model_train()
1664         self.model_predict = self._build_model_predict()
1665
1666     def _build_model_train(self):
1667         """
1668         Builds and compiles the training model, with batch & sequence shape specifications for input.
1669
1670         Returns:
1671         --------
1672         model : tf.keras.Model
1673             The compiled Keras model for training.
1674         """
1675         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
1676         x = inputs
1677         for i in range(self.params['rnn_layers']):
1678             return_sequences = True if i < self.params['rnn_layers'] - 1 else False
1679             x = LSTM(
1680                 units=self.params['rnn_units'],
1681                 activation=self.params['activation'][0],
1682                 dropout=self.params["dropout"][0],
1683                 recurrent_dropout = self.params["recurrent_dropout"],
1684                 recurrent_activation=self.params["recurrent_activation"],
1685                 stateful=self.params['stateful'],
1686                 return_sequences=return_sequences)(x)
1687         if self.params["dropout"][1] > 0:
1688             x = Dropout(self.params["dropout"][1])(x)
1689         for i in range(self.params['dense_layers']):
1690             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1691         model = tf.keras.Model(inputs=inputs, outputs=x)
1692         # optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'], clipvalue=self.params['clipvalue'])
1693         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1694         model.compile(loss='mean_squared_error', optimizer=optimizer)
1695
1696         if self.params["verbose_weights"]:
1697             print(f"Initial Weights Hash: {hash_weights(model)}")
1698         return model
1699     def _build_model_predict(self, return_sequences=True):
1700         """
1701         Builds and compiles the prediction model, doesn't use batch shape nor sequence length to make it easier to predict arbitrary number of timesteps. This model has weights copied over from training model is not directly used for training itself.
1702
1703         Parameters:
1704         -----------
1705         return_sequences : bool, optional
1706             Whether to return the full sequence of outputs. Default is True.
1707
1708         Returns:
1709         --------
1710         model : tf.keras.Model
1711             The compiled Keras model for prediction.
1712         """
1713         inputs = tf.keras.Input(shape=(None,self.params['n_features']))
1714         x = inputs
1715         for i in range(self.params['rnn_layers']):
1716             x = LSTM(
1717                 units=self.params['rnn_units'],
1718                 activation=self.params['activation'][0],
1719                 stateful=False,return_sequences=return_sequences)(x)
1720         for i in range(self.params['dense_layers']):
1721             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
1722         model = tf.keras.Model(inputs=inputs, outputs=x)
1723         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
1724         model.compile(loss='mean_squared_error', optimizer=optimizer)
1725
1726         # Set Weights to model_train
1727         w_fitted = self.model_train.get_weights()
1728         model.set_weights(w_fitted)
1729
1730         return model
1731
1732
1733
1734