2 import tensorflow
as tf
3 from keras
.models
import Sequential
4 from keras
.layers
import Dense
, SimpleRNN
5 # from keras.utils.vis_utils import plot_model
6 from keras
.utils
import plot_model
7 from sklearn
.preprocessing
import MinMaxScaler
8 from sklearn
.metrics
import mean_squared_error
10 import matplotlib
.pyplot
as plt
11 import tensorflow
as tf
12 import keras
.backend
as K
13 import tensorflow
as tf
15 from utils
import vprint
, hash2
, get_item
16 import reproducibility
17 from data_funcs
import check_data
, rmse_data
, plot_data
18 import moisture_models
as mod
22 def staircase(x
,y
,timesteps
,datapoints
,return_sequences
=False, verbose
= False):
23 # x [datapoints,features] all inputs
24 # y [datapoints,outputs]
25 # timesteps: split x and y into samples length timesteps, shifted by 1
26 # datapoints: number of timesteps to use for training, no more than y.shape[0]
27 vprint('staircase: shape x = ',x
.shape
)
28 vprint('staircase: shape y = ',y
.shape
)
29 vprint('staircase: timesteps=',timesteps
)
30 vprint('staircase: datapoints=',datapoints
)
31 vprint('staircase: return_sequences=',return_sequences
)
34 samples
= datapoints
-timesteps
+1
35 vprint('staircase: samples=',samples
,'timesteps=',timesteps
,'features=',features
)
36 x_train
= np
.empty([samples
, timesteps
, features
])
38 vprint('returning all timesteps in a sample')
39 y_train
= np
.empty([samples
, timesteps
, outputs
]) # all
40 for i
in range(samples
):
41 for k
in range(timesteps
):
42 x_train
[i
,k
,:] = x
[i
+k
,:]
43 y_train
[i
,k
,:] = y
[i
+k
,:]
45 vprint('returning only the last timestep in a sample')
46 y_train
= np
.empty([samples
, outputs
])
47 for i
in range(samples
):
48 for k
in range(timesteps
):
49 x_train
[i
,k
,:] = x
[i
+k
,:]
50 y_train
[i
,:] = y
[i
+timesteps
-1,:]
52 return x_train
, y_train
54 def staircase_2(x
,y
,timesteps
,batch_size
=None,trainsteps
=np
.inf
,return_sequences
=False, verbose
= True):
55 # create RNN training data in multiple batches
59 # timesteps: split x and y into sequences length timesteps
60 # a.k.a. lookback or sequence_length
62 # print params if verbose
64 if batch_size
is None:
65 raise ValueError('staircase_2 requires batch_size')
66 print('staircase_2: shape x = ',x
.shape
)
67 print('staircase_2: shape y = ',y
.shape
)
68 print('staircase_2: timesteps=',timesteps
)
69 print('staircase_2: batch_size=',batch_size
)
70 print('staircase_2: return_sequences=',return_sequences
)
74 datapoints
= min(nx
,ny
,trainsteps
)
75 print('staircase_2: datapoints=',datapoints
)
77 # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
78 # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
80 # example with timesteps=3 batch_size=3 datapoints=10
81 # batch 0: [0 1 2] [1 2 3] [2 3 4]
82 # batch 1: [3 4 5] [4 5 6] [5 6 7]
83 # batch 2: [6 7 8] [7 8 9] when runs out this is the last batch, can be shorter
85 # the first sequence in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
87 max_batches
= datapoints
// timesteps
88 max_sequences
= max_batches
* batch_size
90 print('staircase_2: max_batches=',max_batches
)
91 print('staircase_2: max_sequences=',max_sequences
)
93 x_train
= np
.zeros((max_sequences
, timesteps
, features
))
95 y_train
= np
.empty((max_sequences
, timesteps
, outputs
))
97 y_train
= np
.empty((max_sequences
, outputs
))
101 for i
in range(max_batches
):
102 for j
in range(batch_size
):
103 begin
= i
*timesteps
+ j
104 next
= begin
+ timesteps
105 if next
> datapoints
:
107 vprint('sequence',k
,'batch',i
,'sample',j
,'data',begin
,'to',next
-1)
108 x_train
[k
,:,:] = x
[begin
:next
,:]
110 y_train
[k
,:,:] = y
[begin
:next
,:]
112 y_train
[k
,:] = y
[next
-1,:]
115 print('staircase_2: shape x_train = ',x_train
.shape
)
116 print('staircase_2: shape y_train = ',y_train
.shape
)
117 print('staircase_2: sequences generated',k
)
118 print('staircase_2: batch_size=',batch_size
)
119 k
= (k
// batch_size
) * batch_size
120 print('staircase_2: removing partial and empty batches at the end, keeping',k
)
121 x_train
= x_train
[:k
,:,:]
123 y_train
= y_train
[:k
,:,:]
125 y_train
= y_train
[:k
,:]
127 print('staircase_2: shape x_train = ',x_train
.shape
)
128 print('staircase_2: shape y_train = ',y_train
.shape
)
130 return x_train
, y_train
132 def create_RNN_2(hidden_units
, dense_units
, activation
, stateful
=False,
133 batch_shape
=None, input_shape
=None, dense_layers
=1,
134 rnn_layers
=1,return_sequences
=False,
135 initial_state
=None, verbose
= True):
137 print("Function: moisture_rnn.create_RNN_2")
139 arg_dict
= locals().copy()
142 print(f
" {arg} = {arg_dict[arg]}")
144 inputs
= tf
.keras
.Input(batch_shape
=batch_shape
)
146 inputs
= tf
.keras
.Input(shape
=input_shape
)
147 # https://stackoverflow.com/questions/43448029/how-can-i-print-the-values-of-keras-tensors
149 for i
in range(rnn_layers
):
150 x
= tf
.keras
.layers
.SimpleRNN(hidden_units
,activation
=activation
[0],
151 stateful
=stateful
,return_sequences
=return_sequences
)(x
)
152 for i
in range(dense_layers
):
153 x
= tf
.keras
.layers
.Dense(dense_units
, activation
=activation
[1])(x
)
154 model
= tf
.keras
.Model(inputs
=inputs
, outputs
=x
)
155 model
.compile(loss
='mean_squared_error', optimizer
='adam')
158 def create_rnn_data(dat
, params
, hours
=None, h2
=None):
159 # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
161 # dat: (dict) fmda dictionary
162 # params: (dict) hyperparameters
163 # hours: (int) optional parameter to set total length of train+predict
164 # h2: (int) optional parameter to set as length of training period (h2 = total - hours)
165 # Returns: (dict) formatted datat used in RNN
166 timesteps
= params
['timesteps']
167 scale
= params
['scale']
168 rain_do
= params
['rain_do']
169 verbose
= params
['verbose']
170 batch_size
= params
['batch_size']
176 vprint('create_rnn_data: hours=',hours
,' h2=',h2
)
177 # extract inputs the windown of interest
184 # Average Equilibrium
185 # E = (Ed + Ew)/2 # why?
187 # Scale Data if required
189 print('scaling to range 0 to',scale
)
190 scale_fm
=max(max(Ew
),max(Ed
),max(fm
))/scale
191 scale_rain
=max(max(rain
),0.01)/scale
195 rain
= rain
/scale_rain
200 if params
['verbose_weights']:
201 print('scale_fm=',scale_fm
,'scale_rain=',scale_rain
)
203 # transform as 2D, (timesteps, features) and (timesteps, outputs)
206 Et
= np
.vstack((Ed
, Ew
, rain
)).T
207 features_list
= ['Ed', 'Ew', 'rain']
209 Et
= np
.vstack((Ed
, Ew
)).T
210 features_list
= ['Ed', 'Ew']
211 datat
= np
.reshape(fm
,[fm
.shape
[0],1])
214 print('batch_size=',batch_size
)
215 if batch_size
is None or batch_size
is np
.inf
:
216 x_train
, y_train
= staircase(Et
,datat
,timesteps
=timesteps
,datapoints
=h2
,
217 return_sequences
=False, verbose
= verbose
)
220 x_train
, y_train
= staircase_2(Et
,datat
,timesteps
,batch_size
,trainsteps
=h2
,
221 return_sequences
=False, verbose
= verbose
)
223 vprint('x_train shape=',x_train
.shape
)
224 vprint('y_train shape=',y_train
.shape
)
225 samples
, timesteps
, features
= x_train
.shape
227 h0
= tf
.convert_to_tensor(datat
[:samples
],dtype
=tf
.float32
)
229 # Set up return dictionary
237 'timesteps': timesteps
,
238 'features': features
,
244 'scale_rain':scale_rain
,
246 'features_list':features_list
252 def train_rnn_2(rnn_dat
, params
,hours
, fit
=True):
254 verbose
= params
['verbose']
257 hours
= rnn_dat
['hours']
259 samples
= rnn_dat
['samples']
260 features
= rnn_dat
['features']
261 timesteps
= rnn_dat
['timesteps']
262 centering
= params
['centering']
264 model_fit
=create_RNN_2(hidden_units
=params
['hidden_units'],
265 dense_units
=params
['dense_units'],
266 batch_shape
=(samples
,timesteps
,features
),
268 return_sequences
=False,
270 activation
=params
['activation'],
271 dense_layers
=params
['dense_layers'],
275 model_predict
=create_RNN_2(hidden_units
=params
['hidden_units'],
276 dense_units
=params
['dense_units'],
277 input_shape
=(hours
,features
),stateful
= False,
278 return_sequences
=True,
279 activation
=params
['activation'],
280 dense_layers
=params
['dense_layers'],
283 if verbose
: print(model_predict
.summary())
285 x_train
= rnn_dat
['x_train']
286 y_train
= rnn_dat
['y_train']
288 model_fit(x_train
) ## evalue the model once to set nonzero initial state
290 w
, w_name
=get_initial_weights(model_fit
, params
, rnn_dat
)
292 model_fit
.set_weights(w
)
295 model_fit
.fit(x_train
, y_train
+ centering
[1] , epochs
=params
['epochs'],batch_size
=samples
, verbose
=params
['verbose_fit'])
296 w_fitted
=model_fit
.get_weights()
297 if params
['verbose_weights']:
298 for i
in range(len(w_fitted
)):
299 print('weight',i
,w_name
[i
],'shape',w
[i
].shape
,'ndim',w
[i
].ndim
,
300 'fitted: sum',np
.sum(w_fitted
[i
],axis
=0),'\nentries',w_fitted
[i
])
302 print('Fitting skipped, using initial weights')
305 model_predict
.set_weights(w_fitted
)
309 from tensorflow
.keras
.callbacks
import Callback
311 class ResetStatesCallback(Callback
):
312 def on_epoch_end(self
, epoch
, logs
=None):
313 self
.model
.reset_states()
316 def train_rnn(rnn_dat
, params
,hours
, fit
=True, callbacks
=[]):
318 verbose
= params
['verbose']
321 hours
= get_item(rnn_dat
,'hours')
323 samples
= get_item(rnn_dat
,'samples')
324 features
= get_item(rnn_dat
,'features')
325 timesteps
= get_item(rnn_dat
,'timesteps')
326 centering
= get_item(params
,'centering')
327 training
= get_item(params
,'training')
328 batch_size
= get_item(params
,'batch_size')
330 single_batch
= batch_size
is None or batch_size
is np
.inf
333 print('replacing batch_size =',batch_size
)
335 epochs
=get_item(params
,'epochs')
337 model_fit
=create_RNN_2(hidden_units
=params
['hidden_units'],
338 dense_units
=params
['dense_units'],
339 batch_shape
=(batch_size
,timesteps
,features
),
341 return_sequences
=False,
343 activation
=params
['activation'],
344 dense_layers
=params
['dense_layers'],
348 model_predict
=create_RNN_2(hidden_units
=params
['hidden_units'],
349 dense_units
=params
['dense_units'],
350 input_shape
=(hours
,features
),stateful
= False,
351 return_sequences
=True,
352 activation
=params
['activation'],
353 dense_layers
=params
['dense_layers'],
356 if verbose
: print(model_predict
.summary())
358 x_train
= rnn_dat
['x_train']
359 y_train
= rnn_dat
['y_train']
361 print('x_train shape =',x_train
.shape
)
362 print('y_train shape =',y_train
.shape
)
363 print('x_train hash =',hash2(x_train
))
364 print('y_train hash =',hash2(y_train
))
367 model_fit(x_train
) ## evalue the model once to set nonzero initial state for compatibility
368 w
, w_name
=get_initial_weights(model_fit
, params
, rnn_dat
)
369 print('initial weights hash =',hash2(w
))
372 model_fit
.set_weights(w
)
374 model_fit
.fit(x_train
,
375 y_train
+ centering
[1] ,
378 callbacks
= callbacks
,
379 verbose
=get_item(params
,'verbose_fit')
383 print('training by batches')
384 batches
= samples
// batch_size
385 for i
in range(epochs
):
386 for j
in range(batches
):
387 batch
= slice(j
*batch_size
, (j
+1)*batch_size
)
388 model_fit
.fit(x_train
[batch
],
389 y_train
[batch
] + centering
[1] ,
391 callbacks
= callbacks
,
392 verbose
=params
['verbose_fit'])
393 model_fit
.reset_states()
394 print('epoch',i
,'batch j','done')
395 print('epoch',i
,'done')
397 print('training by epoch, resetting state after each epoch')
398 for i
in range(epochs
):
399 model_fit
.fit(x_train
,
400 y_train
+ centering
[1] ,
403 callbacks
= callbacks
,
404 verbose
=params
['verbose_fit'])
405 model_fit
.reset_states()
406 print('epoch',i
,'done')
408 print('training by epoch, not resetting state after each epoch')
409 for i
in range(epochs
):
410 model_fit
.fit(x_train
,
411 y_train
+ centering
[1] ,
414 callbacks
= callbacks
,
415 verbose
=params
['verbose_fit'])
416 print('epoch',i
,'done')
418 print('training in one pass, not resetting state after each epoch')
419 model_fit
.fit(x_train
,
420 y_train
+ centering
[1] ,
422 batch_size
=batch_size
,
423 callbacks
= callbacks
,
424 verbose
=params
['verbose_fit'])
427 print('training in one pass, resetting state after each epoch by callback')
428 model_fit
.fit(x_train
,
429 y_train
+ centering
[1] ,
431 batch_size
=batch_size
,
432 callbacks
= callbacks
+ [ResetStatesCallback()],
433 verbose
=params
['verbose_fit'])
435 w_fitted
=model_fit
.get_weights()
436 print('fitted weights hash =',hash2(w_fitted
))
437 if params
['verbose_weights']:
438 for i
in range(len(w_fitted
)):
439 print('weight',i
,w_name
[i
],'shape',w
[i
].shape
,'ndim',w
[i
].ndim
,
440 'fitted: sum',np
.sum(w_fitted
[i
],axis
=0),'\nentries',w_fitted
[i
])
442 print('Fitting skipped, using initial weights')
445 model_predict
.set_weights(w_fitted
)
449 def get_initial_weights(model_fit
,params
,rnn_dat
):
450 # Given a RNN architecture and hyperparameter dictionary, return array of physics-initiated weights
452 # model_fit: output of create_RNN_2 with no training
453 # params: (dict) dictionary of hyperparameters
454 # rnn_dat: (dict) data dictionary, output of create_rnn_dat
455 # Returns: numpy ndarray of weights that should be a rough solution to the moisture ODE
456 DeltaE
= params
['DeltaE']
458 fmr
= params
['fm_raise_vs_rain']
459 centering
= params
['centering'] # shift activation down
461 w0_initial
={'Ed':(1.-np
.exp(-T1
))/2,
462 'Ew':(1.-np
.exp(-T1
))/2,
463 'rain':fmr
* rnn_dat
['scale_fm']/rnn_dat
['scale_rain']} # wx - input feature
464 # wh wb wd bd = bias -1
466 w_initial
=np
.array([np
.nan
, np
.exp(-0.1), DeltaE
[0]/rnn_dat
['scale_fm'], # layer 0
467 1.0, -centering
[0] + DeltaE
[1]/rnn_dat
['scale_fm']]) # layer 1
468 if params
['verbose_weights']:
469 print('Equilibrium moisture correction bias',DeltaE
[0],
470 'in the hidden layer and',DeltaE
[1],' in the output layer')
472 w_name
= ['wx','wh','bh','wd','bd']
474 w
=model_fit
.get_weights()
475 for j
in range(w
[0].shape
[0]):
476 feature
= rnn_dat
['features_list'][j
]
477 for k
in range(w
[0].shape
[1]):
478 w
[0][j
][k
]=w0_initial
[feature
]
479 for i
in range(1,len(w
)): # number of the weight
480 for j
in range(w
[i
].shape
[0]): # number of the inputs
482 # initialize all entries of the weight matrix to the same number
483 for k
in range(w
[i
].shape
[1]):
484 w
[i
][j
][k
]=w_initial
[i
]/w
[i
].shape
[0]
488 print('weight',i
,'shape',w
[i
].shape
)
489 raise ValueError("Only 1 or 2 dimensions supported")
490 if params
['verbose_weights']:
491 print('weight',i
,w_name
[i
],'shape',w
[i
].shape
,'ndim',w
[i
].ndim
,
492 'initial: sum',np
.sum(w
[i
],axis
=0),'\nentries',w
[i
])
496 def rnn_predict(model
, params
, rnn_dat
):
497 # Given compiled model, hyperparameters, and data dictionary, return array of predicted values.
498 # NOTE: this does not distinguish in-sample vs out-of-sample data, it just fits the model to the given data
500 # model: compiled moisture model
501 # params: (dict) dictionary of hyperparameters
502 # rnn_dat: (dict) dictionary of data to fit model to, output of create_rnn_dat
503 # Returns: numpy array of fitted moisture values
504 verbose
= params
['verbose']
505 centering
= params
['centering']
506 features
= rnn_dat
['features']
507 hours
= rnn_dat
['hours']
509 # model.set_weights(w_fitted)
510 x_input
=np
.reshape(rnn_dat
['Et'],(1, hours
, features
))
511 y_output
= model
.predict(x_input
, verbose
= verbose
) - centering
[1]
513 vprint('x_input.shape=',x_input
.shape
,'y_output.shape=',y_output
.shape
)
515 m
= np
.reshape(y_output
,hours
)
516 # print('weights=',w)
519 m
= m
*rnn_dat
['scale_fm']
520 m
= np
.reshape(m
,hours
)
523 def run_rnn(case_data
,params
,fit
=True,title2
=''):
524 # Run RNN on given case dictionary of FMDA data with certain params. Used internally in run_case
526 # case_data: (dict) collection of cases with FMDA data. Cases are different locations and times, or synthetic data
527 # params: (dict) collection of run options for model
528 # fit: (bool) whether or not to fit RNN to data
529 # title2: (str) title of RNN run to be joined to string with other info for plotting title
530 verbose
= params
['verbose']
532 reproducibility
.set_seed() # Set seed for reproducibility
533 rnn_dat
= create_rnn_data(case_data
,params
)
534 if params
['verbose']:
535 check_data(rnn_dat
,case
=0,name
='rnn_dat')
536 model_predict
= train_rnn(
543 m
= rnn_predict(model_predict
, params
, rnn_dat
)
546 hv
= hash2(model_predict
.get_weights())
547 if case_data
['case']=='case11' and fit
:
548 hv5
= 5.55077327554663e+19
549 print('check 5:',hv
, 'should be',hv5
,'error',hv
-hv5
)
551 checkm
= case_data
['m'][350]
552 mv
= 3.77920889854431152
553 print('checkm=',format(checkm
, '.17f'),' error',checkm
-mv
)
555 print('check - hash weights:',hv
)
557 plot_data(case_data
,title2
=title2
)
559 return m
, rmse_data(case_data
)
562 def run_case(case_data
,params
, check_data
=False):
563 # Given a formatted FMDA dictionary of data, run RNN model using given run params
566 # params: (dict) collection of run options for model
567 # Return: (dict) collection of RMSE error for RNN & EKF, for train and prediction periods
568 print('\n***** ',case_data
['case'],' *****\n')
569 case_data
['rain'][np
.isnan(case_data
['rain'])] = 0
571 check_data(case_data
)
572 hours
=case_data
['hours']
573 if 'train_frac' in params
:
574 case_data
['h2'] = round(hours
* params
['train_frac'])
576 plot_data(case_data
,title2
='case data on input')
577 m
,Ec
= mod
.run_augmented_kf(case_data
) # extract from state
580 plot_data(case_data
,title2
='augmented KF')
581 rmse
= {'Augmented KF':rmse_data(case_data
)}
582 del case_data
['Ec'] # cleanup
583 rmse
.update({'RNN initial':run_rnn(case_data
,params
,fit
=False,title2
='with initial weights, no fit')[1]})
584 rmse
.update({'RNN trained':run_rnn(case_data
,params
,fit
=True,title2
='with trained RNN')[1]})