Add validation data compatibility to run_model top level command
[notebooks.git] / fmda / moisture_rnn.py
blob15774b29ac5e5a3c67bd796e5ae703e9afe0aaae
1 # Environment
2 import numpy as np
3 import pandas as pd
4 import tensorflow as tf
5 import matplotlib.pyplot as plt
6 import sys
7 from tensorflow.keras.callbacks import Callback
8 from sklearn.metrics import mean_squared_error
9 import logging
10 from tensorflow.keras.layers import Input, SimpleRNN, Dropout, Dense
11 # Local modules
12 import reproducibility
13 from utils import print_dict_summary
14 from data_funcs import load_and_fix_data, rmse
15 from abc import ABC, abstractmethod
16 from utils import hash2
17 from data_funcs import rmse, plot_data, compare_dicts
18 import copy
21 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
22 # x [datapoints,features] all inputs
23 # y [datapoints,outputs]
24 # timesteps: split x and y into samples length timesteps, shifted by 1
25 # datapoints: number of timesteps to use for training, no more than y.shape[0]
26 print('staircase: shape x = ',x.shape)
27 print('staircase: shape y = ',y.shape)
28 print('staircase: timesteps=',timesteps)
29 print('staircase: datapoints=',datapoints)
30 print('staircase: return_sequences=',return_sequences)
31 outputs = y.shape[1]
32 features = x.shape[1]
33 samples = datapoints-timesteps+1
34 print('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
35 x_train = np.empty([samples, timesteps, features])
36 if return_sequences:
37 print('returning all timesteps in a sample')
38 y_train = np.empty([samples, timesteps, outputs]) # all
39 for i in range(samples):
40 for k in range(timesteps):
41 x_train[i,k,:] = x[i+k,:]
42 y_train[i,k,:] = y[i+k,:]
43 else:
44 print('returning only the last timestep in a sample')
45 y_train = np.empty([samples, outputs])
46 for i in range(samples):
47 for k in range(timesteps):
48 x_train[i,k,:] = x[i+k,:]
49 y_train[i,:] = y[i+timesteps-1,:]
51 return x_train, y_train
53 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = False):
54 # create RNN training data in multiple batches
55 # input:
56 # x (,features)
57 # y (,outputs)
58 # timesteps: split x and y into sequences length timesteps
59 # a.k.a. lookback or sequence_length
61 # print params if verbose
63 if batch_size is None:
64 raise ValueError('staircase_2 requires batch_size')
65 print('staircase_2: shape x = ',x.shape)
66 print('staircase_2: shape y = ',y.shape)
67 print('staircase_2: timesteps=',timesteps)
68 print('staircase_2: batch_size=',batch_size)
69 print('staircase_2: return_sequences=',return_sequences)
71 nx,features= x.shape
72 ny,outputs = y.shape
73 datapoints = min(nx,ny,trainsteps)
74 print('staircase_2: datapoints=',datapoints)
76 # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
77 # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
79 # example with timesteps=3 batch_size=3 datapoints=15
80 # batch 0: [0 1 2] [1 2 3] [2 3 4]
81 # batch 1: [3 4 5] [4 5 6] [5 6 7]
82 # batch 2: [6 7 8] [7 8 9] [8 9 10]
83 # batch 3: [9 10 11] [10 11 12] [11 12 13]
84 # batch 4: [12 13 14] [13 14 15] when runs out this is the last batch, can be shorter
86 # TODO: implement for multiple locations, same starting time for each batch
87 # Loc 1 Loc 2 Loc 3
88 # batch 0: [0 1 2] [0 1 2] [0 1 2]
89 # batch 1: [3 4 5] [3 4 5] [3 4 5]
90 # batch 2: [6 7 8] [6 7 8] [6 7 8]
91 # TODO: second epoch shift starting time at batch 0 in time
93 # TODO: implement for multiple locations, different starting times for each batch
94 # Loc 1 Loc 2 Loc 3
95 # batch 0: [0 1 2] [1 2 3] [2 3 4]
96 # batch 1: [3 4 5] [4 5 6] [5 6 57
97 # batch 2: [6 7 8] [7 8 9] [8 9 10]
100 # the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
101 # e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
102 # becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
104 # sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
105 # the state at time 0 is fixed but the state is considered a variable at times 1 and 2
106 # the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
107 # sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5 # the state at time 3 is fixed to the output of the first sequence [0 1 2]
108 # the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
109 # how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
110 # there is only one set of weights h(2) = f(h(1),w) h(1) = f(h(0),w) but w is always the same
111 # each column is a one successive evaluation of h(n+1) = f(h(n),w) for n = n_startn n_start+1,...
112 # the cannot be evaluated efficiently on gpu because gpu is a parallel processor
113 # this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
114 # each batch consists of independent calculations
115 # but it can depend on the result of the previous batch (that's the recurrent parr)
119 max_batches = datapoints // timesteps
120 max_sequences = max_batches * batch_size
122 print('staircase_2: max_batches=',max_batches)
123 print('staircase_2: max_sequences=',max_sequences)
125 x_train = np.zeros((max_sequences, timesteps, features))
126 if return_sequences:
127 y_train = np.empty((max_sequences, timesteps, outputs))
128 else:
129 y_train = np.empty((max_sequences, outputs ))
131 # build the sequences
133 for i in range(max_batches):
134 for j in range(batch_size):
135 begin = i*timesteps + j
136 next = begin + timesteps
137 if next > datapoints:
138 break
139 if verbose:
140 print('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
141 x_train[k,:,:] = x[begin:next,:]
142 if return_sequences:
143 y_train[k,:,:] = y[begin:next,:]
144 else:
145 y_train[k,:] = y[next-1,:]
146 k += 1
148 print('staircase_2: shape x_train = ',x_train.shape)
149 print('staircase_2: shape y_train = ',y_train.shape)
150 print('staircase_2: sequences generated',k)
151 print('staircase_2: batch_size=',batch_size)
152 k = (k // batch_size) * batch_size
153 print('staircase_2: removing partial and empty batches at the end, keeping',k)
154 x_train = x_train[:k,:,:]
155 if return_sequences:
156 y_train = y_train[:k,:,:]
157 else:
158 y_train = y_train[:k,:]
160 print('staircase_2: shape x_train = ',x_train.shape)
161 print('staircase_2: shape y_train = ',y_train.shape)
163 return x_train, y_train
165 def create_rnn_data2(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
166 # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
167 # Inputs:
168 # d: (dict) fmda dictionary
169 # params: (dict) hyperparameters
170 # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
171 # train_frac: (float) fraction of data to use for training (starting from time 0)
172 # val_frac: (float) fraction of data to use for validation data (starting from end of train)
173 # Returns: (dict) formatted data used in RNN
174 logging.info('create_rnn_data start')
175 # Copy Dictionary
176 d=copy.deepcopy(dict1)
177 scale = params['scale']
178 features_list = params["features_list"]
181 # Scale Data if required
182 # TODO: Reconcile scaling with moisture_rnn_pkl
183 if scale:
184 scale=1
185 if d['case']=="reproducibility":
186 scale_fm = 17.076346687085564
187 scaler = 'reproducibility'
188 else:
189 scale_fm=1.0
190 scaler=None
192 # Extract desired features based on params, combine into matrix
193 # Extract response vector
194 fm = d['y']
195 y = np.reshape(fm,[fm.shape[0],1])
196 # Extract Features matrix
197 X = d['X']
199 # Check total observed hours
200 hours=d['hours']
201 assert hours == y.shape[0] # Check that it matches response
203 logging.info('create_rnn_data: total_hours=%s',hours)
204 logging.info('feature matrix X shape %s',np.shape(X))
205 logging.info('target matrix Y shape %s',np.shape(y))
206 logging.info('features_list: %s',features_list)
208 logging.info('splitting train/val/test')
209 if train_ind is None:
210 train_ind = round(hours * params['train_frac']) # index of last training observation
211 test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
212 logging.info('Final index of training data=%s',train_ind)
213 logging.info('First index of Test data=%s',test_ind)
214 # Training data from 0 to train_ind
215 X_train = X[:train_ind]
216 y_train = y[:train_ind].reshape(-1,1)
217 # Validation data from train_ind to test_ind
218 X_val = X[train_ind:test_ind]
219 y_val = y[train_ind:test_ind].reshape(-1,1)
220 # Test data from test_ind to end
221 X_test = X[test_ind:]
222 y_test = y[test_ind:].reshape(-1,1)
224 logging.info('x_train shape=%s',X_train.shape)
225 logging.info('y_train shape=%s',y_train.shape)
226 if test_ind == train_ind:
227 logging.info('No validation data')
228 elif X_val.shape[0]!= 0:
229 logging.info('X_val shape=%s',X_val.shape)
230 logging.info('y_val shape=%s',y_val.shape)
231 logging.info('X_test shape=%s',X_test.shape)
232 logging.info('y_test shape=%s',y_test.shape)
234 # Set up return dictionary
235 rnn_dat={
236 'case':d['case'],
237 'hours':hours,
238 'features_list':features_list,
239 'features': len(features_list),
240 'scaler':scaler,
241 'train_ind':train_ind,
242 'test_ind':test_ind,
243 'X':X,
244 'y':y,
245 'X_train': X_train,
246 'y_train': y_train,
247 'X_test': X_test,
248 'y_test': y_test
251 if X_val.shape[0] > 0:
252 rnn_dat.update({
253 'X_val': X_val,
254 'y_val': y_val
257 # Update RNN params using data attributes
258 logging.info('Updating model params based on data')
259 timesteps = params['timesteps']
260 batch_size = params['batch_size']
261 logging.info('batch_size=%s',batch_size)
262 logging.info('timesteps=%s',timesteps)
263 features = len(features_list)
264 params.update({
265 'features': features,
266 'batch_shape': (params["batch_size"],params["timesteps"],features),
267 'pred_input_shape': (hours, features),
268 'scaler': scaler,
269 'scale_fm': scale_fm
271 rnn_dat.update({'scaler': scaler, 'scale_fm': scale_fm})
273 logging.info('create_rnn_data2 done')
274 return rnn_dat
277 # def create_rnn_data(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
278 # # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
279 # # Inputs:
280 # # d: (dict) fmda dictionary
281 # # params: (dict) hyperparameters
282 # # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
283 # # train_frac: (float) fraction of data to use for training (starting from time 0)
284 # # val_frac: (float) fraction of data to use for validation data (starting from end of train)
285 # # Returns: (dict) formatted data used in RNN
286 # logging.info('create_rnn_data start')
287 # # Copy Dictionary
288 # d=copy.deepcopy(dict1)
289 # scale = params['scale']
290 # features_list = params["features_list"]
292 # # Check if reproducibility case
293 # if d['case']=="reproducibility":
294 # params.update({'scale':1})
295 # atm_dict="RAWS"
297 # # Scale Data if required
298 # if scale:
299 # scale=1
300 # if d['case']=="reproducibility":
301 # # Note: this was calculated from the max observed fm, Ed, Ew in a whole timeseries originally with using data from test period
302 # scale_fm = 17.076346687085564
303 # logging.info("REPRODUCIBILITY scaling moisture features: using %s", scale_fm)
304 # logging.info('create_rnn_data: scaling to range 0 to 1')
305 # d[atm_dict]['Ed'] = d[atm_dict]['Ed'] / scale_fm
306 # d[atm_dict]['Ew'] = d[atm_dict]['Ew'] / scale_fm
307 # d[atm_dict]['fm'] = d[atm_dict]['fm'] / scale_fm
308 # scaler = 'reproducibility'
310 # else:
311 # scale_fm=1.0
312 # scaler=None
313 # # Extract desired features based on params, combine into matrix
314 # fm = d[atm_dict]['fm']
315 # values = [d[atm_dict][key] for key in features_list]
316 # X = np.vstack(values).T
317 # # Extract response vector
318 # y = np.reshape(fm,[fm.shape[0],1])
319 # # Calculate total observed hours
320 # hours = X.shape[0]
321 # assert hours == y.shape[0] # Check that it matches response
323 # logging.info('create_rnn_data: total_hours=%s',hours)
324 # logging.info('feature matrix X shape %s',np.shape(X))
325 # logging.info('target matrix Y shape %s',np.shape(y))
326 # logging.info('features_list: %s',features_list)
328 # logging.info('splitting train/val/test')
329 # if train_ind is None:
330 # train_ind = round(hours * params['train_frac']) # index of last training observation
331 # test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
332 # logging.info('Final index of training data=%s',train_ind)
333 # logging.info('First index of Test data=%s',test_ind)
334 # # Training data from 0 to train_ind
335 # X_train = X[:train_ind]
336 # y_train = y[:train_ind].reshape(-1,1)
337 # # Validation data from train_ind to test_ind
338 # X_val = X[train_ind:test_ind]
339 # y_val = y[train_ind:test_ind].reshape(-1,1)
340 # # Test data from test_ind to end
341 # X_test = X[test_ind:]
342 # y_test = y[test_ind:].reshape(-1,1)
344 # logging.info('x_train shape=%s',X_train.shape)
345 # logging.info('y_train shape=%s',y_train.shape)
346 # if test_ind == train_ind:
347 # logging.info('No validation data')
348 # elif X_val.shape[0]!= 0:
349 # logging.info('X_val shape=%s',X_val.shape)
350 # logging.info('y_val shape=%s',y_val.shape)
351 # logging.info('X_test shape=%s',X_test.shape)
352 # logging.info('y_test shape=%s',y_test.shape)
354 # # Set up return dictionary
355 # rnn_dat={
356 # 'case':d['case'],
357 # 'hours':hours,
358 # 'features_list':features_list,
359 # 'features': len(features_list),
360 # 'scaler':scaler,
361 # 'train_ind':train_ind,
362 # 'test_ind':test_ind,
363 # 'X':X,
364 # 'y':y,
365 # 'X_train': X_train,
366 # 'y_train': y_train,
367 # 'X_test': X_test,
368 # 'y_test': y_test
370 # if rnn_dat['scaler'] == "reproducibility":
371 # rnn_dat['scale_fm']=17.076346687085564
372 # if X_val.shape[0] > 0:
373 # rnn_dat.update({
374 # 'X_val': X_val,
375 # 'y_val': y_val
376 # })
378 # # Update RNN params using data attributes
379 # logging.info('Updating model params based on data')
380 # timesteps = params['timesteps']
381 # batch_size = params['batch_size']
382 # logging.info('batch_size=%s',batch_size)
383 # logging.info('timesteps=%s',timesteps)
384 # features = len(features_list)
385 # params.update({
386 # 'features': features,
387 # 'batch_shape': (params["batch_size"],params["timesteps"],features),
388 # 'pred_input_shape': (hours, features),
389 # 'scaler': scaler
390 # })
391 # if params['scaler'] == "reproducibility":
392 # params['scale_fm']=17.076346687085564
395 # logging.info('create_rnn_data_2 done')
396 # return rnn_dat
399 repro_hashes = {
400 'phys_initialize': {
401 'fitted_weight_hash' : 4.2030588308041834e+19,
402 'predictions_hash' :3.59976005554199219
404 'rand_initialize': {
405 'fitted_weight_hash' : 4.4965532557938975e+19,
406 'predictions_hash' : 3.71594738960266113
408 'params':{'id':0,
409 'purpose':'reproducibility',
410 'batch_size':32,
411 'training':5,
412 'cases':['case11'],
413 'scale':1, # every feature in [0, scale]
414 'rain_do':True,
415 'verbose':False,
416 'timesteps':5,
417 'activation':['linear','linear'],
418 'hidden_units':20,
419 'dense_units':1, # do not change
420 'dense_layers':1, # do not change
421 'centering':[0.0,0.0], # should be activation at 0
422 'DeltaE':[0,-1], # bias correction
423 'synthetic':False, # run also synthetic cases
424 'T1': 0.1, # 1/fuel class (10)
425 'fm_raise_vs_rain': 0.2, # fm increase per mm rain
426 'train_frac':0.5, # time fraction to spend on training
427 'epochs':200,
428 'verbose_fit':0,
429 'verbose_weights':False,
430 'initialize': True,
431 'learning_rate': 0.001 # default learning rate
436 class RNNModel(ABC):
437 def __init__(self, params: dict):
438 self.params = params
439 if type(self) is RNNModel:
440 raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
441 super().__init__()
443 @abstractmethod
444 def fit(self, X_train, y_train, weights=None):
445 pass
447 @abstractmethod
448 def predict(self, X):
449 pass
451 def run_model(self, dict0):
452 # Make copy to prevent changing in place
453 dict1 = copy.deepcopy(dict0)
454 # Extract Fields
455 X_train, y_train, X_test, y_test = dict1['X_train'].copy(), dict1['y_train'].copy(), dict1["X_test"].copy(), dict1['y_test'].copy()
456 if 'X_val' in dict1:
457 X_val, y_val = dict1['X_val'].copy(), dict1['y_val'].copy()
458 else:
459 X_val = None
460 case_id = dict1['case']
461 # Fit model
462 if X_val is None:
463 self.fit(X_train, y_train)
464 else:
465 self.fit(X_train, y_train, validation_data = (X_val, y_val))
466 # Generate Predictions,
467 # run through training to get hidden state set proporly for forecast period
468 if X_val is None:
469 X = np.concatenate((X_train, X_test))
470 y = np.concatenate((y_train, y_test)).flatten()
471 else:
472 X = np.concatenate((X_train, X_val, X_test))
473 y = np.concatenate((y_train, y_val, y_test)).flatten()
474 # Predict
475 print(f"Predicting Training through Test \n features hash: {hash2(X)} \n response hash: {hash2(y)} ")
476 m = self.predict(X).flatten()
477 dict1['m']=m
478 dict0['m']=m # add to outside env dictionary, should be only place this happens
479 if self.params['scale']:
480 print(f"Rescaling data using {self.params['scaler']}")
481 if self.params['scaler'] == "reproducibility":
482 m *= self.params['scale_fm']
483 y *= self.params['scale_fm']
484 y_train *= self.params['scale_fm']
485 y_test *= self.params['scale_fm']
486 # Check Reproducibility, TODO: old dict calls it hidden_units not rnn_units, so this doens't check that
487 if (case_id == "reproducibility") and compare_dicts(self.params, repro_hashes['params'], ['epochs', 'batch_size', 'scale', 'activation', 'learning_rate']):
488 print("Checking Reproducibility")
489 checkm = m[350]
490 hv = hash2(self.model_predict.get_weights())
491 if self.params['phys_initialize']:
492 hv5 = repro_hashes['phys_initialize']['fitted_weight_hash']
493 mv = repro_hashes['phys_initialize']['predictions_hash']
494 else:
495 hv5 = repro_hashes['rand_initialize']['fitted_weight_hash']
496 mv = repro_hashes['rand_initialize']['predictions_hash']
498 print(f"Fitted weights hash (check 5): {hv}, Reproducibility weights hash: {hv5}, Error: {hv5-hv}")
499 print(f"Model predictions hash: {checkm}, Reproducibility preds hash: {mv}, Error: {mv-checkm}")
501 print("*******DEBUG*******")
502 # print(dict1.keys())
503 # Plot final fit and data
504 # TODO: make plot_data specific to this context
505 dict1['y'] = y
506 plot_data(dict1, title="RNN", title2=dict1['case'])
508 # Calculate Errors
509 err = rmse(m, y)
510 train_ind = dict1["train_ind"] # index of final training set value
511 test_ind = dict1["test_ind"] # index of first test set value
512 err_train = rmse(m[:train_ind], y_train.flatten())
513 err_pred = rmse(m[test_ind:], y_test.flatten())
514 rmse_dict = {
515 'all': err,
516 'training': err_train,
517 'prediction': err_pred
519 return rmse_dict
521 class ResetStatesCallback(Callback):
522 def on_epoch_end(self, epoch, logs=None):
523 self.model.reset_states()
526 class RNN(RNNModel):
527 def __init__(self, params, loss='mean_squared_error'):
528 super().__init__(params)
529 self.model_train = self._build_model_train()
530 self.model_predict = self._build_model_predict()
532 def _build_model_train(self, return_sequences=False):
533 inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
534 x = inputs
535 for i in range(self.params['rnn_layers']):
536 x = SimpleRNN(
537 units=self.params['rnn_units'],
538 activation=self.params['activation'][0],
539 dropout=self.params["dropout"][0],
540 stateful=self.params['stateful'],
541 return_sequences=return_sequences)(x)
542 if self.params["dropout"][1] > 0:
543 x = Dropout(self.params["dropout"][1])(x)
544 for i in range(self.params['dense_layers']):
545 x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
546 model = tf.keras.Model(inputs=inputs, outputs=x)
547 optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
548 model.compile(loss='mean_squared_error', optimizer=optimizer)
550 if self.params["verbose_weights"]:
551 print(f"Initial Weights Hash: {hash2(model.get_weights())}")
552 return model
553 def _build_model_predict(self, return_sequences=True):
555 inputs = tf.keras.Input(shape=self.params['pred_input_shape'])
556 x = inputs
557 for i in range(self.params['rnn_layers']):
558 x = SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
559 stateful=False,return_sequences=return_sequences)(x)
560 for i in range(self.params['dense_layers']):
561 x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
562 model = tf.keras.Model(inputs=inputs, outputs=x)
563 optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
564 model.compile(loss='mean_squared_error', optimizer=optimizer)
566 # Set Weights to model_train
567 w_fitted = self.model_train.get_weights()
568 model.set_weights(w_fitted)
570 return model
571 def format_train_data(self, X, y, verbose=False):
572 X, y = staircase_2(X, y, timesteps = self.params["timesteps"], batch_size=self.params["batch_size"], verbose=verbose)
573 return X, y
574 def format_pred_data(self, X):
575 return np.reshape(X,(1, X.shape[0], self.params['features']))
576 def fit(self, X_train, y_train, plot=True, plot_title = '',
577 weights=None, callbacks=[], verbose_fit=None, validation_data=None, *args, **kwargs):
578 # verbose_fit argument is for printing out update after each epoch, which gets very long
579 # These print statements at the top could be turned off with a verbose argument, but then
580 # there would be a bunch of different verbose params
581 print(f"Training simple RNN with params: {self.params}")
582 X_train, y_train = self.format_train_data(X_train, y_train)
583 print(f"X_train hash: {hash2(X_train)}")
584 print(f"y_train hash: {hash2(y_train)}")
585 if validation_data is not None:
586 X_val, y_val = self.format_train_data(validation_data[0], validation_data[1])
587 print(f"X_val hash: {hash2(X_val)}")
588 print(f"y_val hash: {hash2(y_val)}")
589 print(f"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
590 # Setup callbacks
591 if self.params["reset_states"]:
592 callbacks=callbacks+[ResetStatesCallback()]
594 # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call
595 if verbose_fit is None:
596 verbose_fit = self.params['verbose_fit']
597 # Evaluate Model once to set nonzero initial state
598 if self.params["batch_size"]>= X_train.shape[0]:
599 self.model_train(X_train)
600 if validation_data is not None:
601 history = self.model_train.fit(
602 X_train, y_train+self.params['centering'][1],
603 epochs=self.params['epochs'],
604 batch_size=self.params['batch_size'],
605 callbacks = callbacks,
606 verbose=verbose_fit,
607 validation_data = (X_val, y_val),
608 *args, **kwargs
610 else:
611 history = self.model_train.fit(
612 X_train, y_train+self.params['centering'][1],
613 epochs=self.params['epochs'],
614 batch_size=self.params['batch_size'],
615 callbacks = callbacks,
616 verbose=verbose_fit,
617 *args, **kwargs
619 if plot:
620 self.plot_history(history,plot_title)
621 if self.params["verbose_weights"]:
622 print(f"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")
624 # Update Weights for Prediction Model
625 w_fitted = self.model_train.get_weights()
626 self.model_predict.set_weights(w_fitted)
627 def predict(self, X_test):
628 print("Predicting with simple RNN")
629 X_test = self.format_pred_data(X_test)
630 preds = self.model_predict.predict(X_test).flatten()
631 return preds
634 def plot_history(self, history, plot_title):
635 plt.figure()
636 plt.semilogy(history.history['loss'], label='Training loss')
637 if 'val_loss' in history.history:
638 plt.semilogy(history.history['val_loss'], label='Validation loss')
639 plt.title(f'{plot_title} Model loss')
640 plt.ylabel('Loss')
641 plt.xlabel('Epoch')
642 plt.legend(loc='upper left')
643 plt.show()