Update fmda_rnn_serial.ipynb
[notebooks.git] / fmda / moisture_rnn.py
blob14d566ab93e8678e7d532c3c55fd5aa66d4ddb78
1 # Environment
2 import numpy as np
3 import pandas as pd
4 import tensorflow as tf
5 import matplotlib.pyplot as plt
6 import sys
7 from tensorflow.keras.callbacks import Callback
8 from sklearn.metrics import mean_squared_error
9 import logging
10 from tensorflow.keras.layers import LSTM, SimpleRNN, Input, Dropout, Dense
11 # Local modules
12 import reproducibility
13 from utils import print_dict_summary
14 from data_funcs import load_and_fix_data, rmse
15 from abc import ABC, abstractmethod
16 from utils import hash2
17 from data_funcs import rmse, plot_data, compare_dicts
18 import copy
19 from sklearn.preprocessing import MinMaxScaler, StandardScaler
21 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
22 # x [datapoints,features] all inputs
23 # y [datapoints,outputs]
24 # timesteps: split x and y into samples length timesteps, shifted by 1
25 # datapoints: number of timesteps to use for training, no more than y.shape[0]
26 print('staircase: shape x = ',x.shape)
27 print('staircase: shape y = ',y.shape)
28 print('staircase: timesteps=',timesteps)
29 print('staircase: datapoints=',datapoints)
30 print('staircase: return_sequences=',return_sequences)
31 outputs = y.shape[1]
32 features = x.shape[1]
33 samples = datapoints-timesteps+1
34 print('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
35 x_train = np.empty([samples, timesteps, features])
36 if return_sequences:
37 print('returning all timesteps in a sample')
38 y_train = np.empty([samples, timesteps, outputs]) # all
39 for i in range(samples):
40 for k in range(timesteps):
41 x_train[i,k,:] = x[i+k,:]
42 y_train[i,k,:] = y[i+k,:]
43 else:
44 print('returning only the last timestep in a sample')
45 y_train = np.empty([samples, outputs])
46 for i in range(samples):
47 for k in range(timesteps):
48 x_train[i,k,:] = x[i+k,:]
49 y_train[i,:] = y[i+timesteps-1,:]
51 return x_train, y_train
53 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = False):
54 # create RNN training data in multiple batches
55 # input:
56 # x (,features)
57 # y (,outputs)
58 # timesteps: split x and y into sequences length timesteps
59 # a.k.a. lookback or sequence_length
61 # print params if verbose
63 if batch_size is None:
64 raise ValueError('staircase_2 requires batch_size')
65 print('staircase_2: shape x = ',x.shape)
66 print('staircase_2: shape y = ',y.shape)
67 print('staircase_2: timesteps=',timesteps)
68 print('staircase_2: batch_size=',batch_size)
69 print('staircase_2: return_sequences=',return_sequences)
71 nx,features= x.shape
72 ny,outputs = y.shape
73 datapoints = min(nx,ny,trainsteps)
74 print('staircase_2: datapoints=',datapoints)
76 # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
77 # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
79 # example with timesteps=3 batch_size=3 datapoints=15
80 # batch 0: [0 1 2] [1 2 3] [2 3 4]
81 # batch 1: [3 4 5] [4 5 6] [5 6 7]
82 # batch 2: [6 7 8] [7 8 9] [8 9 10]
83 # batch 3: [9 10 11] [10 11 12] [11 12 13]
84 # batch 4: [12 13 14] [13 14 15] when runs out this is the last batch, can be shorter
86 # TODO: implement for multiple locations, same starting time for each batch
87 # Loc 1 Loc 2 Loc 3
88 # batch 0: [0 1 2] [0 1 2] [0 1 2]
89 # batch 1: [3 4 5] [3 4 5] [3 4 5]
90 # batch 2: [6 7 8] [6 7 8] [6 7 8]
91 # TODO: second epoch shift starting time at batch 0 in time
93 # TODO: implement for multiple locations, different starting times for each batch
94 # Loc 1 Loc 2 Loc 3
95 # batch 0: [0 1 2] [1 2 3] [2 3 4]
96 # batch 1: [3 4 5] [4 5 6] [5 6 57
97 # batch 2: [6 7 8] [7 8 9] [8 9 10]
100 # the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
101 # e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
102 # becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
104 # sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
105 # the state at time 0 is fixed but the state is considered a variable at times 1 and 2
106 # the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
107 # sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5 # the state at time 3 is fixed to the output of the first sequence [0 1 2]
108 # the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
109 # how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
110 # there is only one set of weights h(2) = f(h(1),w) h(1) = f(h(0),w) but w is always the same
111 # each column is a one successive evaluation of h(n+1) = f(h(n),w) for n = n_startn n_start+1,...
112 # the cannot be evaluated efficiently on gpu because gpu is a parallel processor
113 # this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
114 # each batch consists of independent calculations
115 # but it can depend on the result of the previous batch (that's the recurrent parr)
119 max_batches = datapoints // timesteps
120 max_sequences = max_batches * batch_size
122 print('staircase_2: max_batches=',max_batches)
123 print('staircase_2: max_sequences=',max_sequences)
125 x_train = np.zeros((max_sequences, timesteps, features))
126 if return_sequences:
127 y_train = np.empty((max_sequences, timesteps, outputs))
128 else:
129 y_train = np.empty((max_sequences, outputs ))
131 # build the sequences
133 for i in range(max_batches):
134 for j in range(batch_size):
135 begin = i*timesteps + j
136 next = begin + timesteps
137 if next > datapoints:
138 break
139 if verbose:
140 print('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
141 x_train[k,:,:] = x[begin:next,:]
142 if return_sequences:
143 y_train[k,:,:] = y[begin:next,:]
144 else:
145 y_train[k,:] = y[next-1,:]
146 k += 1
148 print('staircase_2: shape x_train = ',x_train.shape)
149 print('staircase_2: shape y_train = ',y_train.shape)
150 print('staircase_2: sequences generated',k)
151 print('staircase_2: batch_size=',batch_size)
152 k = (k // batch_size) * batch_size
153 print('staircase_2: removing partial and empty batches at the end, keeping',k)
154 x_train = x_train[:k,:,:]
155 if return_sequences:
156 y_train = y_train[:k,:,:]
157 else:
158 y_train = y_train[:k,:]
160 print('staircase_2: shape x_train = ',x_train.shape)
161 print('staircase_2: shape y_train = ',y_train.shape)
163 return x_train, y_train
166 # Dictionary of scalers, used to avoid multiple object creation and to avoid multiple if statements
167 scalers = {
168 'minmax': MinMaxScaler(),
169 'standard': StandardScaler()
172 # def scale_transform(X, method='minmax'):
173 # # Function to scale data in place
174 # # Inputs:
175 # # X: (ndarray) data to be scaled
176 # # method: (str) one of keys in scalers dictionary above
177 # scaler = scalers[method]
178 # scaler.fit(X)
179 # # Modify X in-place
180 # X[:] = scaler.transform(X)
182 def create_rnn_data2(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None):
183 # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
184 # Inputs:
185 # d: (dict) fmda dictionary
186 # params: (dict) hyperparameters
187 # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
188 # train_frac: (float) fraction of data to use for training (starting from time 0)
189 # val_frac: (float) fraction of data to use for validation data (starting from end of train)
190 # Returns: (dict) formatted data used in RNN
191 logging.info('create_rnn_data start')
192 # Copy Dictionary
193 d=copy.deepcopy(dict1)
194 scale = params['scale']
195 scaler= params['scaler']
196 features_list = params["features_list"]
199 # Extract desired features based on params, combine into matrix
200 # Extract response vector
201 fm = d['y']
202 y = np.reshape(fm,[fm.shape[0],1])
203 # Extract Features matrix
204 X = d['X']
206 # Check total observed hours
207 hours=d['hours']
208 assert hours == y.shape[0] # Check that it matches response
210 logging.info('create_rnn_data: total_hours=%s',hours)
211 logging.info('feature matrix X shape %s',np.shape(X))
212 logging.info('target matrix Y shape %s',np.shape(y))
213 logging.info('features_list: %s',features_list)
215 logging.info('splitting train/val/test')
216 if train_ind is None:
217 train_ind = round(hours * params['train_frac']) # index of last training observation
218 test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
219 logging.info('Final index of training data=%s',train_ind)
220 logging.info('First index of Test data=%s',test_ind)
221 # Training data from 0 to train_ind
222 X_train = X[:train_ind]
223 y_train = y[:train_ind].reshape(-1,1)
224 # Validation data from train_ind to test_ind
225 X_val = X[train_ind:test_ind]
226 y_val = y[train_ind:test_ind].reshape(-1,1)
227 # Test data from test_ind to end
228 X_test = X[test_ind:]
229 y_test = y[test_ind:].reshape(-1,1)
231 # Scale Data if required
232 # TODO:
233 # Remove need for "scale_fm" param
234 # Reset reproducibility with this scaling
235 if scale:
236 logging.info('Scaling feature data with scaler: %s',scaler)
237 # scale=1
238 if scaler=="reproducibility":
239 scale_fm = 17.076346687085564
240 else:
241 scale_fm=1.0
242 # Fit scaler to training data
243 scalers[scaler].fit(X_train)
244 # Apply scaling to all data using in-place operations
245 X_train[:] = scalers[scaler].transform(X_train)
246 if X_val.shape[0] > 0:
247 X_val[:] = scalers[scaler].transform(X_val)
248 X_test[:] = scalers[scaler].transform(X_test)
251 else:
252 print("Not scaling data")
253 scale_fm=1.0
254 scaler=None
256 logging.info('x_train shape=%s',X_train.shape)
257 logging.info('y_train shape=%s',y_train.shape)
258 if test_ind == train_ind:
259 logging.info('No validation data')
260 elif X_val.shape[0]!= 0:
261 logging.info('X_val shape=%s',X_val.shape)
262 logging.info('y_val shape=%s',y_val.shape)
263 logging.info('X_test shape=%s',X_test.shape)
264 logging.info('y_test shape=%s',y_test.shape)
266 # Set up return dictionary
267 rnn_dat={
268 'case':d['case'],
269 'hours':hours,
270 'features_list':features_list,
271 'features': len(features_list),
272 'scaler':scaler,
273 'train_ind':train_ind,
274 'test_ind':test_ind,
275 'X':X,
276 'y':y,
277 'X_train': X_train,
278 'y_train': y_train,
279 'X_test': X_test,
280 'y_test': y_test
283 if X_val.shape[0] > 0:
284 rnn_dat.update({
285 'X_val': X_val,
286 'y_val': y_val
289 # Update RNN params using data attributes
290 logging.info('Updating model params based on data')
291 timesteps = params['timesteps']
292 batch_size = params['batch_size']
293 logging.info('batch_size=%s',batch_size)
294 logging.info('timesteps=%s',timesteps)
295 features = len(features_list)
296 params.update({
297 'features': features,
298 'batch_shape': (params["batch_size"],params["timesteps"],features),
299 'pred_input_shape': (hours, features),
300 'scaler': scaler,
301 'scale_fm': scale_fm
303 rnn_dat.update({'scaler': scaler, 'scale_fm': scale_fm})
305 logging.info('create_rnn_data2 done')
306 return rnn_dat
309 # def create_rnn_data(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
310 # # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
311 # # Inputs:
312 # # d: (dict) fmda dictionary
313 # # params: (dict) hyperparameters
314 # # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
315 # # train_frac: (float) fraction of data to use for training (starting from time 0)
316 # # val_frac: (float) fraction of data to use for validation data (starting from end of train)
317 # # Returns: (dict) formatted data used in RNN
318 # logging.info('create_rnn_data start')
319 # # Copy Dictionary
320 # d=copy.deepcopy(dict1)
321 # scale = params['scale']
322 # features_list = params["features_list"]
324 # # Check if reproducibility case
325 # if d['case']=="reproducibility":
326 # params.update({'scale':1})
327 # atm_dict="RAWS"
329 # # Scale Data if required
330 # if scale:
331 # scale=1
332 # if d['case']=="reproducibility":
333 # # Note: this was calculated from the max observed fm, Ed, Ew in a whole timeseries originally with using data from test period
334 # scale_fm = 17.076346687085564
335 # logging.info("REPRODUCIBILITY scaling moisture features: using %s", scale_fm)
336 # logging.info('create_rnn_data: scaling to range 0 to 1')
337 # d[atm_dict]['Ed'] = d[atm_dict]['Ed'] / scale_fm
338 # d[atm_dict]['Ew'] = d[atm_dict]['Ew'] / scale_fm
339 # d[atm_dict]['fm'] = d[atm_dict]['fm'] / scale_fm
340 # scaler = 'reproducibility'
342 # else:
343 # scale_fm=1.0
344 # scaler=None
345 # # Extract desired features based on params, combine into matrix
346 # fm = d[atm_dict]['fm']
347 # values = [d[atm_dict][key] for key in features_list]
348 # X = np.vstack(values).T
349 # # Extract response vector
350 # y = np.reshape(fm,[fm.shape[0],1])
351 # # Calculate total observed hours
352 # hours = X.shape[0]
353 # assert hours == y.shape[0] # Check that it matches response
355 # logging.info('create_rnn_data: total_hours=%s',hours)
356 # logging.info('feature matrix X shape %s',np.shape(X))
357 # logging.info('target matrix Y shape %s',np.shape(y))
358 # logging.info('features_list: %s',features_list)
360 # logging.info('splitting train/val/test')
361 # if train_ind is None:
362 # train_ind = round(hours * params['train_frac']) # index of last training observation
363 # test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
364 # logging.info('Final index of training data=%s',train_ind)
365 # logging.info('First index of Test data=%s',test_ind)
366 # # Training data from 0 to train_ind
367 # X_train = X[:train_ind]
368 # y_train = y[:train_ind].reshape(-1,1)
369 # # Validation data from train_ind to test_ind
370 # X_val = X[train_ind:test_ind]
371 # y_val = y[train_ind:test_ind].reshape(-1,1)
372 # # Test data from test_ind to end
373 # X_test = X[test_ind:]
374 # y_test = y[test_ind:].reshape(-1,1)
376 # logging.info('x_train shape=%s',X_train.shape)
377 # logging.info('y_train shape=%s',y_train.shape)
378 # if test_ind == train_ind:
379 # logging.info('No validation data')
380 # elif X_val.shape[0]!= 0:
381 # logging.info('X_val shape=%s',X_val.shape)
382 # logging.info('y_val shape=%s',y_val.shape)
383 # logging.info('X_test shape=%s',X_test.shape)
384 # logging.info('y_test shape=%s',y_test.shape)
386 # # Set up return dictionary
387 # rnn_dat={
388 # 'case':d['case'],
389 # 'hours':hours,
390 # 'features_list':features_list,
391 # 'features': len(features_list),
392 # 'scaler':scaler,
393 # 'train_ind':train_ind,
394 # 'test_ind':test_ind,
395 # 'X':X,
396 # 'y':y,
397 # 'X_train': X_train,
398 # 'y_train': y_train,
399 # 'X_test': X_test,
400 # 'y_test': y_test
402 # if rnn_dat['scaler'] == "reproducibility":
403 # rnn_dat['scale_fm']=17.076346687085564
404 # if X_val.shape[0] > 0:
405 # rnn_dat.update({
406 # 'X_val': X_val,
407 # 'y_val': y_val
408 # })
410 # # Update RNN params using data attributes
411 # logging.info('Updating model params based on data')
412 # timesteps = params['timesteps']
413 # batch_size = params['batch_size']
414 # logging.info('batch_size=%s',batch_size)
415 # logging.info('timesteps=%s',timesteps)
416 # features = len(features_list)
417 # params.update({
418 # 'features': features,
419 # 'batch_shape': (params["batch_size"],params["timesteps"],features),
420 # 'pred_input_shape': (hours, features),
421 # 'scaler': scaler
422 # })
423 # if params['scaler'] == "reproducibility":
424 # params['scale_fm']=17.076346687085564
427 # logging.info('create_rnn_data_2 done')
428 # return rnn_dat
431 repro_hashes = {
432 'phys_initialize': {
433 'fitted_weight_hash' : 4.2030588308041834e+19,
434 'predictions_hash' :3.59976005554199219
436 'rand_initialize': {
437 'fitted_weight_hash' : 4.4965532557938975e+19,
438 'predictions_hash' : 3.71594738960266113
440 'params':{'id':0,
441 'purpose':'reproducibility',
442 'batch_size':32,
443 'training':5,
444 'cases':['case11'],
445 'scale':1, # every feature in [0, scale]
446 'rain_do':True,
447 'verbose':False,
448 'timesteps':5,
449 'activation':['linear','linear'],
450 'hidden_units':20,
451 'dense_units':1, # do not change
452 'dense_layers':1, # do not change
453 'centering':[0.0,0.0], # should be activation at 0
454 'DeltaE':[0,-1], # bias correction
455 'synthetic':False, # run also synthetic cases
456 'T1': 0.1, # 1/fuel class (10)
457 'fm_raise_vs_rain': 0.2, # fm increase per mm rain
458 'train_frac':0.5, # time fraction to spend on training
459 'epochs':200,
460 'verbose_fit':0,
461 'verbose_weights':False,
462 'initialize': True,
463 'learning_rate': 0.001 # default learning rate
468 class RNNModel(ABC):
469 def __init__(self, params: dict):
470 self.params = params
471 if type(self) is RNNModel:
472 raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
473 super().__init__()
475 @abstractmethod
476 def _build_model_train(self, return_sequences=False):
477 pass
479 @abstractmethod
480 def _build_model_predict(self, return_sequences=True):
481 pass
483 def fit(self, X_train, y_train, plot=True, plot_title = '',
484 weights=None, callbacks=[], verbose_fit=None, validation_data=None, *args, **kwargs):
485 # verbose_fit argument is for printing out update after each epoch, which gets very long
486 # These print statements at the top could be turned off with a verbose argument, but then
487 # there would be a bunch of different verbose params
488 print(f"Training simple RNN with params: {self.params}")
489 X_train, y_train = self.format_train_data(X_train, y_train)
490 print(f"X_train hash: {hash2(X_train)}")
491 print(f"y_train hash: {hash2(y_train)}")
492 if validation_data is not None:
493 X_val, y_val = self.format_train_data(validation_data[0], validation_data[1])
494 print(f"X_val hash: {hash2(X_val)}")
495 print(f"y_val hash: {hash2(y_val)}")
496 print(f"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
497 # Setup callbacks
498 if self.params["reset_states"]:
499 callbacks=callbacks+[ResetStatesCallback()]
501 # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call
502 if verbose_fit is None:
503 verbose_fit = self.params['verbose_fit']
504 # Evaluate Model once to set nonzero initial state
505 if self.params["batch_size"]>= X_train.shape[0]:
506 self.model_train(X_train)
507 if validation_data is not None:
508 history = self.model_train.fit(
509 X_train, y_train+self.params['centering'][1],
510 epochs=self.params['epochs'],
511 batch_size=self.params['batch_size'],
512 callbacks = callbacks,
513 verbose=verbose_fit,
514 validation_data = (X_val, y_val),
515 *args, **kwargs
517 else:
518 history = self.model_train.fit(
519 X_train, y_train+self.params['centering'][1],
520 epochs=self.params['epochs'],
521 batch_size=self.params['batch_size'],
522 callbacks = callbacks,
523 verbose=verbose_fit,
524 *args, **kwargs
526 if plot:
527 self.plot_history(history,plot_title)
528 if self.params["verbose_weights"]:
529 print(f"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")
531 # Update Weights for Prediction Model
532 w_fitted = self.model_train.get_weights()
533 self.model_predict.set_weights(w_fitted)
535 def predict(self, X_test):
536 print("Predicting with simple RNN")
537 X_test = self.format_pred_data(X_test)
538 preds = self.model_predict.predict(X_test).flatten()
539 return preds
541 def format_train_data(self, X, y, verbose=False):
542 X, y = staircase_2(X, y, timesteps = self.params["timesteps"], batch_size=self.params["batch_size"], verbose=verbose)
543 return X, y
544 def format_pred_data(self, X):
545 return np.reshape(X,(1, X.shape[0], self.params['features']))
547 def plot_history(self, history, plot_title):
548 plt.figure()
549 plt.semilogy(history.history['loss'], label='Training loss')
550 if 'val_loss' in history.history:
551 plt.semilogy(history.history['val_loss'], label='Validation loss')
552 plt.title(f'{plot_title} Model loss')
553 plt.ylabel('Loss')
554 plt.xlabel('Epoch')
555 plt.legend(loc='upper left')
556 plt.show()
558 def run_model(self, dict0):
559 # Make copy to prevent changing in place
560 dict1 = copy.deepcopy(dict0)
561 # Extract Fields
562 X_train, y_train, X_test, y_test = dict1['X_train'].copy(), dict1['y_train'].copy(), dict1["X_test"].copy(), dict1['y_test'].copy()
563 if 'X_val' in dict1:
564 X_val, y_val = dict1['X_val'].copy(), dict1['y_val'].copy()
565 else:
566 X_val = None
567 case_id = dict1['case']
569 # Fit model
570 if X_val is None:
571 self.fit(X_train, y_train)
572 else:
573 self.fit(X_train, y_train, validation_data = (X_val, y_val))
574 # Generate Predictions,
575 # run through training to get hidden state set proporly for forecast period
576 if X_val is None:
577 X = np.concatenate((X_train, X_test))
578 y = np.concatenate((y_train, y_test)).flatten()
579 else:
580 X = np.concatenate((X_train, X_val, X_test))
581 y = np.concatenate((y_train, y_val, y_test)).flatten()
582 # Predict
583 print(f"Predicting Training through Test \n features hash: {hash2(X)} \n response hash: {hash2(y)} ")
584 m = self.predict(X).flatten()
585 dict1['m']=m
586 dict0['m']=m # add to outside env dictionary, should be only place this happens
587 if self.params['scale']:
588 print(f"Rescaling data using {self.params['scaler']}")
589 if self.params['scaler'] == "reproducibility":
590 m *= self.params['scale_fm']
591 y *= self.params['scale_fm']
592 y_train *= self.params['scale_fm']
593 y_test *= self.params['scale_fm']
594 # Check Reproducibility, TODO: old dict calls it hidden_units not rnn_units, so this doens't check that
595 if (case_id == "reproducibility") and compare_dicts(self.params, repro_hashes['params'], ['epochs', 'batch_size', 'scale', 'activation', 'learning_rate']):
596 print("Checking Reproducibility")
597 checkm = m[350]
598 hv = hash2(self.model_predict.get_weights())
599 if self.params['phys_initialize']:
600 hv5 = repro_hashes['phys_initialize']['fitted_weight_hash']
601 mv = repro_hashes['phys_initialize']['predictions_hash']
602 else:
603 hv5 = repro_hashes['rand_initialize']['fitted_weight_hash']
604 mv = repro_hashes['rand_initialize']['predictions_hash']
606 print(f"Fitted weights hash (check 5): {hv} \n Reproducibility weights hash: {hv5} \n Error: {hv5-hv}")
607 print(f"Model predictions hash: {checkm} \n Reproducibility preds hash: {mv} \n Error: {mv-checkm}")
609 # print(dict1.keys())
610 # Plot final fit and data
611 # TODO: make plot_data specific to this context
612 dict1['y'] = y
613 plot_data(dict1, title="RNN", title2=dict1['case'])
615 # Calculate Errors
616 err = rmse(m, y)
617 train_ind = dict1["train_ind"] # index of final training set value
618 test_ind = dict1["test_ind"] # index of first test set value
619 err_train = rmse(m[:train_ind], y_train.flatten())
620 err_pred = rmse(m[test_ind:], y_test.flatten())
621 rmse_dict = {
622 'all': err,
623 'training': err_train,
624 'prediction': err_pred
626 return m, rmse_dict
628 class ResetStatesCallback(Callback):
629 def on_epoch_end(self, epoch, logs=None):
630 self.model.reset_states()
633 class RNN(RNNModel):
634 def __init__(self, params, loss='mean_squared_error'):
635 super().__init__(params)
636 self.model_train = self._build_model_train()
637 self.model_predict = self._build_model_predict()
639 def _build_model_train(self, return_sequences=False):
640 inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
641 x = inputs
642 for i in range(self.params['rnn_layers']):
643 x = SimpleRNN(
644 units=self.params['rnn_units'],
645 activation=self.params['activation'][0],
646 dropout=self.params["dropout"][0],
647 stateful=self.params['stateful'],
648 return_sequences=return_sequences)(x)
649 if self.params["dropout"][1] > 0:
650 x = Dropout(self.params["dropout"][1])(x)
651 for i in range(self.params['dense_layers']):
652 x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
653 model = tf.keras.Model(inputs=inputs, outputs=x)
654 optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
655 model.compile(loss='mean_squared_error', optimizer=optimizer)
657 if self.params["verbose_weights"]:
658 print(f"Initial Weights Hash: {hash2(model.get_weights())}")
659 return model
660 def _build_model_predict(self, return_sequences=True):
662 inputs = tf.keras.Input(shape=(None,self.params['features']))
663 x = inputs
664 for i in range(self.params['rnn_layers']):
665 x = SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
666 stateful=False,return_sequences=return_sequences)(x)
667 for i in range(self.params['dense_layers']):
668 x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
669 model = tf.keras.Model(inputs=inputs, outputs=x)
670 optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
671 model.compile(loss='mean_squared_error', optimizer=optimizer)
673 # Set Weights to model_train
674 w_fitted = self.model_train.get_weights()
675 model.set_weights(w_fitted)
677 return model
680 class RNN_LSTM(RNNModel):
681 def __init__(self, params, loss='mean_squared_error'):
682 super().__init__(params)
683 self.model_train = self._build_model_train()
684 self.model_predict = self._build_model_predict()
686 def _build_model_train(self, return_sequences=False):
687 inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
688 x = inputs
689 for i in range(self.params['rnn_layers']):
690 x = LSTM(
691 units=self.params['rnn_units'],
692 activation=self.params['activation'][0],
693 dropout=self.params["dropout"][0],
694 recurrent_activation=self.params["recurrent_activation"],
695 stateful=self.params['stateful'],
696 return_sequences=return_sequences)(x)
697 if self.params["dropout"][1] > 0:
698 x = Dropout(self.params["dropout"][1])(x)
699 for i in range(self.params['dense_layers']):
700 x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
701 model = tf.keras.Model(inputs=inputs, outputs=x)
702 optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
703 model.compile(loss='mean_squared_error', optimizer=optimizer)
705 if self.params["verbose_weights"]:
706 print(f"Initial Weights Hash: {hash2(model.get_weights())}")
707 return model
708 def _build_model_predict(self, return_sequences=True):
710 inputs = tf.keras.Input(shape=(None,self.params['features']))
711 x = inputs
712 for i in range(self.params['rnn_layers']):
713 x = LSTM(
714 units=self.params['rnn_units'],
715 activation=self.params['activation'][0],
716 stateful=False,return_sequences=return_sequences)(x)
717 for i in range(self.params['dense_layers']):
718 x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
719 model = tf.keras.Model(inputs=inputs, outputs=x)
720 optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
721 model.compile(loss='mean_squared_error', optimizer=optimizer)
723 # Set Weights to model_train
724 w_fitted = self.model_train.get_weights()
725 model.set_weights(w_fitted)
727 return model