4 import tensorflow
as tf
5 import matplotlib
.pyplot
as plt
7 from tensorflow
.keras
.callbacks
import Callback
8 from sklearn
.metrics
import mean_squared_error
10 from tensorflow
.keras
.layers
import Input
, SimpleRNN
, Dropout
, Dense
12 import reproducibility
13 from utils
import print_dict_summary
14 from data_funcs
import load_and_fix_data
, rmse
15 from abc
import ABC
, abstractmethod
16 from utils
import hash2
17 from data_funcs
import rmse
, plot_data
, compare_dicts
21 def staircase(x
,y
,timesteps
,datapoints
,return_sequences
=False, verbose
= False):
22 # x [datapoints,features] all inputs
23 # y [datapoints,outputs]
24 # timesteps: split x and y into samples length timesteps, shifted by 1
25 # datapoints: number of timesteps to use for training, no more than y.shape[0]
26 print('staircase: shape x = ',x
.shape
)
27 print('staircase: shape y = ',y
.shape
)
28 print('staircase: timesteps=',timesteps
)
29 print('staircase: datapoints=',datapoints
)
30 print('staircase: return_sequences=',return_sequences
)
33 samples
= datapoints
-timesteps
+1
34 print('staircase: samples=',samples
,'timesteps=',timesteps
,'features=',features
)
35 x_train
= np
.empty([samples
, timesteps
, features
])
37 print('returning all timesteps in a sample')
38 y_train
= np
.empty([samples
, timesteps
, outputs
]) # all
39 for i
in range(samples
):
40 for k
in range(timesteps
):
41 x_train
[i
,k
,:] = x
[i
+k
,:]
42 y_train
[i
,k
,:] = y
[i
+k
,:]
44 print('returning only the last timestep in a sample')
45 y_train
= np
.empty([samples
, outputs
])
46 for i
in range(samples
):
47 for k
in range(timesteps
):
48 x_train
[i
,k
,:] = x
[i
+k
,:]
49 y_train
[i
,:] = y
[i
+timesteps
-1,:]
51 return x_train
, y_train
53 def staircase_2(x
,y
,timesteps
,batch_size
=None,trainsteps
=np
.inf
,return_sequences
=False, verbose
= False):
54 # create RNN training data in multiple batches
58 # timesteps: split x and y into sequences length timesteps
59 # a.k.a. lookback or sequence_length
61 # print params if verbose
63 if batch_size
is None:
64 raise ValueError('staircase_2 requires batch_size')
65 print('staircase_2: shape x = ',x
.shape
)
66 print('staircase_2: shape y = ',y
.shape
)
67 print('staircase_2: timesteps=',timesteps
)
68 print('staircase_2: batch_size=',batch_size
)
69 print('staircase_2: return_sequences=',return_sequences
)
73 datapoints
= min(nx
,ny
,trainsteps
)
74 print('staircase_2: datapoints=',datapoints
)
76 # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
77 # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
79 # example with timesteps=3 batch_size=3 datapoints=15
80 # batch 0: [0 1 2] [1 2 3] [2 3 4]
81 # batch 1: [3 4 5] [4 5 6] [5 6 7]
82 # batch 2: [6 7 8] [7 8 9] [8 9 10]
83 # batch 3: [9 10 11] [10 11 12] [11 12 13]
84 # batch 4: [12 13 14] [13 14 15] when runs out this is the last batch, can be shorter
86 # TODO: implement for multiple locations, same starting time for each batch
88 # batch 0: [0 1 2] [0 1 2] [0 1 2]
89 # batch 1: [3 4 5] [3 4 5] [3 4 5]
90 # batch 2: [6 7 8] [6 7 8] [6 7 8]
91 # TODO: second epoch shift starting time at batch 0 in time
93 # TODO: implement for multiple locations, different starting times for each batch
95 # batch 0: [0 1 2] [1 2 3] [2 3 4]
96 # batch 1: [3 4 5] [4 5 6] [5 6 57
97 # batch 2: [6 7 8] [7 8 9] [8 9 10]
100 # the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
101 # e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
102 # becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
104 # sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
105 # the state at time 0 is fixed but the state is considered a variable at times 1 and 2
106 # the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
107 # sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5 # the state at time 3 is fixed to the output of the first sequence [0 1 2]
108 # the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
109 # how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
110 # there is only one set of weights h(2) = f(h(1),w) h(1) = f(h(0),w) but w is always the same
111 # each column is a one successive evaluation of h(n+1) = f(h(n),w) for n = n_startn n_start+1,...
112 # the cannot be evaluated efficiently on gpu because gpu is a parallel processor
113 # this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
114 # each batch consists of independent calculations
115 # but it can depend on the result of the previous batch (that's the recurrent parr)
119 max_batches
= datapoints
// timesteps
120 max_sequences
= max_batches
* batch_size
122 print('staircase_2: max_batches=',max_batches
)
123 print('staircase_2: max_sequences=',max_sequences
)
125 x_train
= np
.zeros((max_sequences
, timesteps
, features
))
127 y_train
= np
.empty((max_sequences
, timesteps
, outputs
))
129 y_train
= np
.empty((max_sequences
, outputs
))
131 # build the sequences
133 for i
in range(max_batches
):
134 for j
in range(batch_size
):
135 begin
= i
*timesteps
+ j
136 next
= begin
+ timesteps
137 if next
> datapoints
:
140 print('sequence',k
,'batch',i
,'sample',j
,'data',begin
,'to',next
-1)
141 x_train
[k
,:,:] = x
[begin
:next
,:]
143 y_train
[k
,:,:] = y
[begin
:next
,:]
145 y_train
[k
,:] = y
[next
-1,:]
148 print('staircase_2: shape x_train = ',x_train
.shape
)
149 print('staircase_2: shape y_train = ',y_train
.shape
)
150 print('staircase_2: sequences generated',k
)
151 print('staircase_2: batch_size=',batch_size
)
152 k
= (k
// batch_size
) * batch_size
153 print('staircase_2: removing partial and empty batches at the end, keeping',k
)
154 x_train
= x_train
[:k
,:,:]
156 y_train
= y_train
[:k
,:,:]
158 y_train
= y_train
[:k
,:]
160 print('staircase_2: shape x_train = ',x_train
.shape
)
161 print('staircase_2: shape y_train = ',y_train
.shape
)
163 return x_train
, y_train
165 def create_rnn_data2(dict1
, params
, atm_dict
="HRRR", verbose
=False, train_ind
=None, test_ind
=None, scaler
=None):
166 # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
168 # d: (dict) fmda dictionary
169 # params: (dict) hyperparameters
170 # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
171 # train_frac: (float) fraction of data to use for training (starting from time 0)
172 # val_frac: (float) fraction of data to use for validation data (starting from end of train)
173 # Returns: (dict) formatted data used in RNN
174 logging
.info('create_rnn_data start')
176 d
=copy
.deepcopy(dict1
)
177 scale
= params
['scale']
178 features_list
= params
["features_list"]
181 # Scale Data if required
182 # TODO: Reconcile scaling with moisture_rnn_pkl
185 if d
['case']=="reproducibility":
186 scale_fm
= 17.076346687085564
187 scaler
= 'reproducibility'
192 # Extract desired features based on params, combine into matrix
193 # Extract response vector
195 y
= np
.reshape(fm
,[fm
.shape
[0],1])
196 # Extract Features matrix
199 # Check total observed hours
201 assert hours
== y
.shape
[0] # Check that it matches response
203 logging
.info('create_rnn_data: total_hours=%s',hours
)
204 logging
.info('feature matrix X shape %s',np
.shape(X
))
205 logging
.info('target matrix Y shape %s',np
.shape(y
))
206 logging
.info('features_list: %s',features_list
)
208 logging
.info('splitting train/val/test')
209 if train_ind
is None:
210 train_ind
= round(hours
* params
['train_frac']) # index of last training observation
211 test_ind
= train_ind
+ round(hours
* params
['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
212 logging
.info('Final index of training data=%s',train_ind
)
213 logging
.info('First index of Test data=%s',test_ind
)
214 # Training data from 0 to train_ind
215 X_train
= X
[:train_ind
]
216 y_train
= y
[:train_ind
].reshape(-1,1)
217 # Validation data from train_ind to test_ind
218 X_val
= X
[train_ind
:test_ind
]
219 y_val
= y
[train_ind
:test_ind
].reshape(-1,1)
220 # Test data from test_ind to end
221 X_test
= X
[test_ind
:]
222 y_test
= y
[test_ind
:].reshape(-1,1)
224 logging
.info('x_train shape=%s',X_train
.shape
)
225 logging
.info('y_train shape=%s',y_train
.shape
)
226 if test_ind
== train_ind
:
227 logging
.info('No validation data')
228 elif X_val
.shape
[0]!= 0:
229 logging
.info('X_val shape=%s',X_val
.shape
)
230 logging
.info('y_val shape=%s',y_val
.shape
)
231 logging
.info('X_test shape=%s',X_test
.shape
)
232 logging
.info('y_test shape=%s',y_test
.shape
)
234 # Set up return dictionary
238 'features_list':features_list
,
239 'features': len(features_list
),
241 'train_ind':train_ind
,
251 if X_val
.shape
[0] > 0:
257 # Update RNN params using data attributes
258 logging
.info('Updating model params based on data')
259 timesteps
= params
['timesteps']
260 batch_size
= params
['batch_size']
261 logging
.info('batch_size=%s',batch_size
)
262 logging
.info('timesteps=%s',timesteps
)
263 features
= len(features_list
)
265 'features': features
,
266 'batch_shape': (params
["batch_size"],params
["timesteps"],features
),
267 'pred_input_shape': (hours
, features
),
271 rnn_dat
.update({'scaler': scaler
, 'scale_fm': scale_fm
})
273 logging
.info('create_rnn_data2 done')
277 # def create_rnn_data(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
278 # # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
280 # # d: (dict) fmda dictionary
281 # # params: (dict) hyperparameters
282 # # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
283 # # train_frac: (float) fraction of data to use for training (starting from time 0)
284 # # val_frac: (float) fraction of data to use for validation data (starting from end of train)
285 # # Returns: (dict) formatted data used in RNN
286 # logging.info('create_rnn_data start')
288 # d=copy.deepcopy(dict1)
289 # scale = params['scale']
290 # features_list = params["features_list"]
292 # # Check if reproducibility case
293 # if d['case']=="reproducibility":
294 # params.update({'scale':1})
297 # # Scale Data if required
300 # if d['case']=="reproducibility":
301 # # Note: this was calculated from the max observed fm, Ed, Ew in a whole timeseries originally with using data from test period
302 # scale_fm = 17.076346687085564
303 # logging.info("REPRODUCIBILITY scaling moisture features: using %s", scale_fm)
304 # logging.info('create_rnn_data: scaling to range 0 to 1')
305 # d[atm_dict]['Ed'] = d[atm_dict]['Ed'] / scale_fm
306 # d[atm_dict]['Ew'] = d[atm_dict]['Ew'] / scale_fm
307 # d[atm_dict]['fm'] = d[atm_dict]['fm'] / scale_fm
308 # scaler = 'reproducibility'
313 # # Extract desired features based on params, combine into matrix
314 # fm = d[atm_dict]['fm']
315 # values = [d[atm_dict][key] for key in features_list]
316 # X = np.vstack(values).T
317 # # Extract response vector
318 # y = np.reshape(fm,[fm.shape[0],1])
319 # # Calculate total observed hours
321 # assert hours == y.shape[0] # Check that it matches response
323 # logging.info('create_rnn_data: total_hours=%s',hours)
324 # logging.info('feature matrix X shape %s',np.shape(X))
325 # logging.info('target matrix Y shape %s',np.shape(y))
326 # logging.info('features_list: %s',features_list)
328 # logging.info('splitting train/val/test')
329 # if train_ind is None:
330 # train_ind = round(hours * params['train_frac']) # index of last training observation
331 # test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
332 # logging.info('Final index of training data=%s',train_ind)
333 # logging.info('First index of Test data=%s',test_ind)
334 # # Training data from 0 to train_ind
335 # X_train = X[:train_ind]
336 # y_train = y[:train_ind].reshape(-1,1)
337 # # Validation data from train_ind to test_ind
338 # X_val = X[train_ind:test_ind]
339 # y_val = y[train_ind:test_ind].reshape(-1,1)
340 # # Test data from test_ind to end
341 # X_test = X[test_ind:]
342 # y_test = y[test_ind:].reshape(-1,1)
344 # logging.info('x_train shape=%s',X_train.shape)
345 # logging.info('y_train shape=%s',y_train.shape)
346 # if test_ind == train_ind:
347 # logging.info('No validation data')
348 # elif X_val.shape[0]!= 0:
349 # logging.info('X_val shape=%s',X_val.shape)
350 # logging.info('y_val shape=%s',y_val.shape)
351 # logging.info('X_test shape=%s',X_test.shape)
352 # logging.info('y_test shape=%s',y_test.shape)
354 # # Set up return dictionary
358 # 'features_list':features_list,
359 # 'features': len(features_list),
361 # 'train_ind':train_ind,
362 # 'test_ind':test_ind,
365 # 'X_train': X_train,
366 # 'y_train': y_train,
370 # if rnn_dat['scaler'] == "reproducibility":
371 # rnn_dat['scale_fm']=17.076346687085564
372 # if X_val.shape[0] > 0:
378 # # Update RNN params using data attributes
379 # logging.info('Updating model params based on data')
380 # timesteps = params['timesteps']
381 # batch_size = params['batch_size']
382 # logging.info('batch_size=%s',batch_size)
383 # logging.info('timesteps=%s',timesteps)
384 # features = len(features_list)
386 # 'features': features,
387 # 'batch_shape': (params["batch_size"],params["timesteps"],features),
388 # 'pred_input_shape': (hours, features),
391 # if params['scaler'] == "reproducibility":
392 # params['scale_fm']=17.076346687085564
395 # logging.info('create_rnn_data_2 done')
401 'fitted_weight_hash' : 4.2030588308041834e+19,
402 'predictions_hash' :3.59976005554199219
405 'fitted_weight_hash' : 4.4965532557938975e+19,
406 'predictions_hash' : 3.71594738960266113
409 'purpose':'reproducibility',
413 'scale':1, # every feature in [0, scale]
417 'activation':['linear','linear'],
419 'dense_units':1, # do not change
420 'dense_layers':1, # do not change
421 'centering':[0.0,0.0], # should be activation at 0
422 'DeltaE':[0,-1], # bias correction
423 'synthetic':False, # run also synthetic cases
424 'T1': 0.1, # 1/fuel class (10)
425 'fm_raise_vs_rain': 0.2, # fm increase per mm rain
426 'train_frac':0.5, # time fraction to spend on training
429 'verbose_weights':False,
431 'learning_rate': 0.001 # default learning rate
437 def __init__(self
, params
: dict):
439 if type(self
) is RNNModel
:
440 raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
444 def fit(self
, X_train
, y_train
, weights
=None):
448 def predict(self
, X
):
451 def run_model(self
, dict0
):
452 # Make copy to prevent changing in place
453 dict1
= copy
.deepcopy(dict0
)
455 X_train
, y_train
, X_test
, y_test
= dict1
['X_train'].copy(), dict1
['y_train'].copy(), dict1
["X_test"].copy(), dict1
['y_test'].copy()
457 X_val
, y_val
= dict1
['X_val'].copy(), dict1
['y_val'].copy()
460 case_id
= dict1
['case']
463 self
.fit(X_train
, y_train
)
465 self
.fit(X_train
, y_train
, validation_data
= (X_val
, y_val
))
466 # Generate Predictions,
467 # run through training to get hidden state set proporly for forecast period
469 X
= np
.concatenate((X_train
, X_test
))
470 y
= np
.concatenate((y_train
, y_test
)).flatten()
472 X
= np
.concatenate((X_train
, X_val
, X_test
))
473 y
= np
.concatenate((y_train
, y_val
, y_test
)).flatten()
475 print(f
"Predicting Training through Test \n features hash: {hash2(X)} \n response hash: {hash2(y)} ")
476 m
= self
.predict(X
).flatten()
478 dict0
['m']=m
# add to outside env dictionary, should be only place this happens
479 if self
.params
['scale']:
480 print(f
"Rescaling data using {self.params['scaler']}")
481 if self
.params
['scaler'] == "reproducibility":
482 m
*= self
.params
['scale_fm']
483 y
*= self
.params
['scale_fm']
484 y_train
*= self
.params
['scale_fm']
485 y_test
*= self
.params
['scale_fm']
486 # Check Reproducibility, TODO: old dict calls it hidden_units not rnn_units, so this doens't check that
487 if (case_id
== "reproducibility") and compare_dicts(self
.params
, repro_hashes
['params'], ['epochs', 'batch_size', 'scale', 'activation', 'learning_rate']):
488 print("Checking Reproducibility")
490 hv
= hash2(self
.model_predict
.get_weights())
491 if self
.params
['phys_initialize']:
492 hv5
= repro_hashes
['phys_initialize']['fitted_weight_hash']
493 mv
= repro_hashes
['phys_initialize']['predictions_hash']
495 hv5
= repro_hashes
['rand_initialize']['fitted_weight_hash']
496 mv
= repro_hashes
['rand_initialize']['predictions_hash']
498 print(f
"Fitted weights hash (check 5): {hv}, Reproducibility weights hash: {hv5}, Error: {hv5-hv}")
499 print(f
"Model predictions hash: {checkm}, Reproducibility preds hash: {mv}, Error: {mv-checkm}")
501 print("*******DEBUG*******")
502 # print(dict1.keys())
503 # Plot final fit and data
504 # TODO: make plot_data specific to this context
506 plot_data(dict1
, title
="RNN", title2
=dict1
['case'])
510 train_ind
= dict1
["train_ind"] # index of final training set value
511 test_ind
= dict1
["test_ind"] # index of first test set value
512 err_train
= rmse(m
[:train_ind
], y_train
.flatten())
513 err_pred
= rmse(m
[test_ind
:], y_test
.flatten())
516 'training': err_train
,
517 'prediction': err_pred
521 class ResetStatesCallback(Callback
):
522 def on_epoch_end(self
, epoch
, logs
=None):
523 self
.model
.reset_states()
527 def __init__(self
, params
, loss
='mean_squared_error'):
528 super().__init
__(params
)
529 self
.model_train
= self
._build
_model
_train
()
530 self
.model_predict
= self
._build
_model
_predict
()
532 def _build_model_train(self
, return_sequences
=False):
533 inputs
= tf
.keras
.Input(batch_shape
=self
.params
['batch_shape'])
535 for i
in range(self
.params
['rnn_layers']):
537 units
=self
.params
['rnn_units'],
538 activation
=self
.params
['activation'][0],
539 dropout
=self
.params
["dropout"][0],
540 stateful
=self
.params
['stateful'],
541 return_sequences
=return_sequences
)(x
)
542 if self
.params
["dropout"][1] > 0:
543 x
= Dropout(self
.params
["dropout"][1])(x
)
544 for i
in range(self
.params
['dense_layers']):
545 x
= Dense(self
.params
['dense_units'], activation
=self
.params
['activation'][1])(x
)
546 model
= tf
.keras
.Model(inputs
=inputs
, outputs
=x
)
547 optimizer
=tf
.keras
.optimizers
.Adam(learning_rate
=self
.params
['learning_rate'])
548 model
.compile(loss
='mean_squared_error', optimizer
=optimizer
)
550 if self
.params
["verbose_weights"]:
551 print(f
"Initial Weights Hash: {hash2(model.get_weights())}")
553 def _build_model_predict(self
, return_sequences
=True):
555 inputs
= tf
.keras
.Input(shape
=self
.params
['pred_input_shape'])
557 for i
in range(self
.params
['rnn_layers']):
558 x
= SimpleRNN(self
.params
['rnn_units'],activation
=self
.params
['activation'][0],
559 stateful
=False,return_sequences
=return_sequences
)(x
)
560 for i
in range(self
.params
['dense_layers']):
561 x
= Dense(self
.params
['dense_units'], activation
=self
.params
['activation'][1])(x
)
562 model
= tf
.keras
.Model(inputs
=inputs
, outputs
=x
)
563 optimizer
=tf
.keras
.optimizers
.Adam(learning_rate
=self
.params
['learning_rate'])
564 model
.compile(loss
='mean_squared_error', optimizer
=optimizer
)
566 # Set Weights to model_train
567 w_fitted
= self
.model_train
.get_weights()
568 model
.set_weights(w_fitted
)
571 def format_train_data(self
, X
, y
, verbose
=False):
572 X
, y
= staircase_2(X
, y
, timesteps
= self
.params
["timesteps"], batch_size
=self
.params
["batch_size"], verbose
=verbose
)
574 def format_pred_data(self
, X
):
575 return np
.reshape(X
,(1, X
.shape
[0], self
.params
['features']))
576 def fit(self
, X_train
, y_train
, plot
=True, plot_title
= '',
577 weights
=None, callbacks
=[], verbose_fit
=None, validation_data
=None, *args
, **kwargs
):
578 # verbose_fit argument is for printing out update after each epoch, which gets very long
579 # These print statements at the top could be turned off with a verbose argument, but then
580 # there would be a bunch of different verbose params
581 print(f
"Training simple RNN with params: {self.params}")
582 X_train
, y_train
= self
.format_train_data(X_train
, y_train
)
583 print(f
"X_train hash: {hash2(X_train)}")
584 print(f
"y_train hash: {hash2(y_train)}")
585 if validation_data
is not None:
586 X_val
, y_val
= self
.format_train_data(validation_data
[0], validation_data
[1])
587 print(f
"X_val hash: {hash2(X_val)}")
588 print(f
"y_val hash: {hash2(y_val)}")
589 print(f
"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
591 if self
.params
["reset_states"]:
592 callbacks
=callbacks
+[ResetStatesCallback()]
594 # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call
595 if verbose_fit
is None:
596 verbose_fit
= self
.params
['verbose_fit']
597 # Evaluate Model once to set nonzero initial state
598 if self
.params
["batch_size"]>= X_train
.shape
[0]:
599 self
.model_train(X_train
)
600 if validation_data
is not None:
601 history
= self
.model_train
.fit(
602 X_train
, y_train
+self
.params
['centering'][1],
603 epochs
=self
.params
['epochs'],
604 batch_size
=self
.params
['batch_size'],
605 callbacks
= callbacks
,
607 validation_data
= (X_val
, y_val
),
611 history
= self
.model_train
.fit(
612 X_train
, y_train
+self
.params
['centering'][1],
613 epochs
=self
.params
['epochs'],
614 batch_size
=self
.params
['batch_size'],
615 callbacks
= callbacks
,
620 self
.plot_history(history
,plot_title
)
621 if self
.params
["verbose_weights"]:
622 print(f
"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")
624 # Update Weights for Prediction Model
625 w_fitted
= self
.model_train
.get_weights()
626 self
.model_predict
.set_weights(w_fitted
)
627 def predict(self
, X_test
):
628 print("Predicting with simple RNN")
629 X_test
= self
.format_pred_data(X_test
)
630 preds
= self
.model_predict
.predict(X_test
).flatten()
634 def plot_history(self
, history
, plot_title
):
636 plt
.semilogy(history
.history
['loss'], label
='Training loss')
637 if 'val_loss' in history
.history
:
638 plt
.semilogy(history
.history
['val_loss'], label
='Validation loss')
639 plt
.title(f
'{plot_title} Model loss')
642 plt
.legend(loc
='upper left')