From ce1e513b09fbd68d166f77543a1b25791d7086a1 Mon Sep 17 00:00:00 2001
From: jh-206 <jhirschi206@gmail.com>
Date: Tue, 10 Sep 2024 10:06:08 -0600
Subject: [PATCH] Delete create_rnn_data2

Deprecated, use custom class RNNData now
---
 fmda/moisture_rnn.py | 147 ---------------------------------------------------
 1 file changed, 147 deletions(-)

diff --git a/fmda/moisture_rnn.py b/fmda/moisture_rnn.py
index 888fce6..9b20a8a 100644
--- a/fmda/moisture_rnn.py
+++ b/fmda/moisture_rnn.py
@@ -185,153 +185,6 @@ scalers = {
 }
 
 
-## DEPRECATED, use RNNData class instead
-def create_rnn_data2(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None):
-    # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
-    # Inputs:
-    # d: (dict) fmda dictionary
-    # params: (dict) hyperparameters
-    # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
-    # train_frac: (float) fraction of data to use for training (starting from time 0)
-    # val_frac: (float) fraction of data to use for validation data (starting from end of train)
-    # Returns: (dict) formatted data used in RNN 
-    logging.info('create_rnn_data start')
-    # Copy Dictionary to avoid changing the input to this function
-    d=copy.deepcopy(dict1)
-    scale = params['scale']
-    scaler= params['scaler']
-    # Features list given by params dict to be used in training
-    features_list = params["features_list"]
-    # All available features list, corresponds to shape of X
-    features_all = d["features_list"]
-    # Indices to subset all features with based on params features
-    indices = []
-    for item in features_list:
-        if item in features_all:
-            indices.append(features_all.index(item))
-        else:
-            print(f"Warning: feature name '{item}' not found in list of all features from input data")
-        
-    # Extract desired features based on params, combine into matrix
-    # Extract response vector 
-    y = d['y']
-    y = np.reshape(y,(-1,1))
-    # Extract Features matrix, subset to desired features
-    X_raw = d['X'][:, indices].copy() # saw untransformed features matrix 
-    X = d['X']
-    X = X[:, indices]
-
-    # Check total observed hours
-    hours=d['hours']    
-    assert hours == y.shape[0] # Check that it matches response
-    
-    logging.info('create_rnn_data: total_hours=%s',hours)
-    logging.info('feature matrix X shape %s',np.shape(X))
-    logging.info('target  matrix Y shape %s',np.shape(y))
-    logging.info('features_list: %s',features_list)
-
-    logging.info('splitting train/val/test')
-    if train_ind is None:
-        train_ind = round(hours * params['train_frac']) # index of last training observation
-    test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
-    logging.info('Final index of training data=%s',train_ind)
-    logging.info('First index of Test data=%s',test_ind)
-    # Training data from 0 to train_ind
-    X_train = X[:train_ind]
-    y_train = y[:train_ind].reshape(-1,1)
-    # Validation data from train_ind to test_ind
-    X_val = X[train_ind:test_ind]
-    y_val = y[train_ind:test_ind].reshape(-1,1)
-    # Test data from test_ind to end
-    X_test = X[test_ind:]
-    y_test = y[test_ind:].reshape(-1,1)
-
-    # Scale Data if required
-    # TODO:
-        # Remove need for "scale_fm" param
-        # Reset reproducibility with this scaling
-    if scale:
-        logging.info('Scaling feature data with scaler: %s',scaler)
-        # scale=1
-        if scaler=="reproducibility":
-            scale_fm = 17.076346687085564
-            scale_rain = 0.01
-        else:
-            scale_fm=1.0
-            scale_rain=1.0
-            # Fit scaler to training data
-            scalers[scaler].fit(X_train)
-            # Apply scaling to all data using in-place operations
-            X_train[:] = scalers[scaler].transform(X_train)
-            if X_val.shape[0] > 0:
-                X_val[:] = scalers[scaler].transform(X_val)
-            X_test[:] = scalers[scaler].transform(X_test)
-            
-            
-    else:
-        print("Not scaling data")
-        scale_fm=1.0
-        scale_rain=1.0
-        scaler=None
-    
-    logging.info('x_train shape=%s',X_train.shape)
-    logging.info('y_train shape=%s',y_train.shape)
-    if test_ind == train_ind:
-        logging.info('No validation data')
-    elif X_val.shape[0]!= 0:
-        logging.info('X_val shape=%s',X_val.shape)
-        logging.info('y_val shape=%s',y_val.shape)    
-    logging.info('X_test shape=%s',X_test.shape)
-    logging.info('y_test shape=%s',y_test.shape)
-    
-    # Set up return dictionary
-    rnn_dat={
-        'case':d['case'],
-        'hours':hours,
-        'features_list':features_list,
-        'n_features': len(features_list),
-        'scaler':scaler,
-        'train_ind':train_ind,
-        'test_ind':test_ind,
-        'X_raw': X_raw,
-        'X':X,
-        'y':y,
-        'X_train': X_train,
-        'y_train': y_train,
-        'X_test': X_test,
-        'y_test': y_test
-    }
-
-    if X_val.shape[0] > 0:
-            rnn_dat.update({
-                'X_val': X_val,
-                'y_val': y_val
-            })
-
-    # Update RNN params using data attributes
-    logging.info('Updating model params based on data')
-    timesteps = params['timesteps']
-    batch_size = params['batch_size']
-    logging.info('batch_size=%s',batch_size)
-    logging.info('timesteps=%s',timesteps)
-    features = len(features_list)
-    # params.update({
-    #         'n_features': features,
-    #         'batch_shape': (params["batch_size"],params["timesteps"],features),
-    #         'pred_input_shape': (None, features),
-    #         'scaler': scaler,
-    #         'scale_fm': scale_fm,
-    #         'scale_rain': scale_rain
-    #     })
-    rnn_dat.update({
-        'scaler': scaler, 
-        'scale_fm': scale_fm,
-        'scale_rain': scale_rain
-    })
-    
-    logging.info('create_rnn_data2 done')
-    return rnn_dat
-
 def batch_setup(ids, batch_size):
     """
     Sets up stateful batched training data scheme for RNN training.
-- 
2.11.4.GIT