fixing print
[notebooks.git] / fmda / moisture_rnn.py
blobf1f6361b8fcfb63d596adf85b26fd2a3ce80b6c9
1 import numpy as np
2 import tensorflow as tf
3 from keras.models import Sequential
4 from keras.layers import Dense, SimpleRNN
5 # from keras.utils.vis_utils import plot_model
6 from keras.utils import plot_model
7 from sklearn.preprocessing import MinMaxScaler
8 from sklearn.metrics import mean_squared_error
9 import math
10 import matplotlib.pyplot as plt
11 import tensorflow as tf
12 import keras.backend as K
13 import tensorflow as tf
14 import pandas as pd
15 from utils import vprint, hash2, get_item
16 import reproducibility
17 from data_funcs import check_data, rmse_data, plot_data
18 import moisture_models as mod
19 import sys
22 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
23 # x [datapoints,features] all inputs
24 # y [datapoints,outputs]
25 # timesteps: split x and y into samples length timesteps, shifted by 1
26 # datapoints: number of timesteps to use for training, no more than y.shape[0]
27 vprint('staircase: shape x = ',x.shape)
28 vprint('staircase: shape y = ',y.shape)
29 vprint('staircase: timesteps=',timesteps)
30 vprint('staircase: datapoints=',datapoints)
31 vprint('staircase: return_sequences=',return_sequences)
32 outputs = y.shape[1]
33 features = x.shape[1]
34 samples = datapoints-timesteps+1
35 vprint('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
36 x_train = np.empty([samples, timesteps, features])
37 if return_sequences:
38 vprint('returning all timesteps in a sample')
39 y_train = np.empty([samples, timesteps, outputs]) # all
40 for i in range(samples):
41 for k in range(timesteps):
42 x_train[i,k,:] = x[i+k,:]
43 y_train[i,k,:] = y[i+k,:]
44 else:
45 vprint('returning only the last timestep in a sample')
46 y_train = np.empty([samples, outputs])
47 for i in range(samples):
48 for k in range(timesteps):
49 x_train[i,k,:] = x[i+k,:]
50 y_train[i,:] = y[i+timesteps-1,:]
52 return x_train, y_train
54 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = True):
55 # create RNN training data in multiple batches
56 # input:
57 # x (,features)
58 # y (,outputs)
59 # timesteps: split x and y into sequences length timesteps
60 # a.k.a. lookback or sequence_length
62 # print params if verbose
64 if batch_size is None:
65 raise ValueError('staircase_2 requires batch_size')
66 print('staircase_2: shape x = ',x.shape)
67 print('staircase_2: shape y = ',y.shape)
68 print('staircase_2: timesteps=',timesteps)
69 print('staircase_2: batch_size=',batch_size)
70 print('staircase_2: return_sequences=',return_sequences)
72 nx,features= x.shape
73 ny,outputs = y.shape
74 datapoints = min(nx,ny,trainsteps)
75 print('staircase_2: datapoints=',datapoints)
77 # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
78 # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
80 # example with timesteps=3 batch_size=3 datapoints=10
81 # batch 0: [0 1 2] [1 2 3] [2 3 4]
82 # batch 1: [3 4 5] [4 5 6] [5 6 7]
83 # batch 2: [6 7 8] [7 8 9] when runs out this is the last batch, can be shorter
85 # the first sequence in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
87 max_batches = datapoints // timesteps
88 max_sequences = max_batches * batch_size
90 print('staircase_2: max_batches=',max_batches)
91 print('staircase_2: max_sequences=',max_sequences)
93 x_train = np.zeros((max_sequences, timesteps, features))
94 if return_sequences:
95 y_train = np.empty((max_sequences, timesteps, outputs))
96 else:
97 y_train = np.empty((max_sequences, outputs ))
99 # build the sequences
101 for i in range(max_batches):
102 for j in range(batch_size):
103 begin = i*timesteps + j
104 next = begin + timesteps
105 if next > datapoints:
106 break
107 vprint('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
108 x_train[k,:,:] = x[begin:next,:]
109 if return_sequences:
110 y_train[k,:,:] = y[begin:next,:]
111 else:
112 y_train[k,:] = y[next-1,:]
113 k += 1
115 print('staircase_2: shape x_train = ',x_train.shape)
116 print('staircase_2: shape y_train = ',y_train.shape)
117 print('staircase_2: sequences generated',k)
118 print('staircase_2: batch_size=',batch_size)
119 k = (k // batch_size) * batch_size
120 print('staircase_2: removing partial and empty batches at the end, keeping',k)
121 x_train = x_train[:k,:,:]
122 if return_sequences:
123 y_train = y_train[:k,:,:]
124 else:
125 y_train = y_train[:k,:]
127 print('staircase_2: shape x_train = ',x_train.shape)
128 print('staircase_2: shape y_train = ',y_train.shape)
130 return x_train, y_train
132 def create_RNN_2(hidden_units, dense_units, activation, stateful=False,
133 batch_shape=None, input_shape=None, dense_layers=1,
134 rnn_layers=1,return_sequences=False,
135 initial_state=None, verbose = True):
136 if verbose:
137 print("Function: moisture_rnn.create_RNN_2")
138 print("Arguments:")
139 arg_dict = locals().copy()
140 for arg in arg_dict:
141 if arg != "self":
142 print(f" {arg} = {arg_dict[arg]}")
143 if stateful:
144 inputs = tf.keras.Input(batch_shape=batch_shape)
145 else:
146 inputs = tf.keras.Input(shape=input_shape)
147 # https://stackoverflow.com/questions/43448029/how-can-i-print-the-values-of-keras-tensors
148 x = inputs
149 for i in range(rnn_layers):
150 x = tf.keras.layers.SimpleRNN(hidden_units,activation=activation[0],
151 stateful=stateful,return_sequences=return_sequences)(x)
152 for i in range(dense_layers):
153 x = tf.keras.layers.Dense(dense_units, activation=activation[1])(x)
154 model = tf.keras.Model(inputs=inputs, outputs=x)
155 model.compile(loss='mean_squared_error', optimizer='adam')
156 return model
158 def create_rnn_data(dat, params, hours=None, h2=None):
159 # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
160 # Inputs:
161 # dat: (dict) fmda dictionary
162 # params: (dict) hyperparameters
163 # hours: (int) optional parameter to set total length of train+predict
164 # h2: (int) optional parameter to set as length of training period (h2 = total - hours)
165 # Returns: (dict) formatted datat used in RNN
166 timesteps = params['timesteps']
167 scale = params['scale']
168 rain_do = params['rain_do']
169 verbose = params['verbose']
170 batch_size = params['batch_size']
172 if hours is None:
173 hours = dat['hours']
174 if h2 is None:
175 h2 = dat['h2']
176 vprint('create_rnn_data: hours=',hours,' h2=',h2)
177 # extract inputs the windown of interest
178 Ew = dat['Ew']
179 Ed = dat['Ed']
180 rain = dat['rain']
181 fm = dat['fm']
182 # temp = dat['temp']
184 # Average Equilibrium
185 # E = (Ed + Ew)/2 # why?
187 # Scale Data if required
188 if scale:
189 print('scaling to range 0 to',scale)
190 scale_fm=max(max(Ew),max(Ed),max(fm))/scale
191 scale_rain=max(max(rain),0.01)/scale
192 Ed = Ed/scale_fm
193 Ew = Ew/scale_fm
194 fm = fm/scale_fm
195 rain = rain/scale_rain
196 else:
197 scale_fm=1.0
198 scale_rain=1.0
200 if params['verbose_weights']:
201 print('scale_fm=',scale_fm,'scale_rain=',scale_rain)
203 # transform as 2D, (timesteps, features) and (timesteps, outputs)
205 if rain_do:
206 Et = np.vstack((Ed, Ew, rain)).T
207 features_list = ['Ed', 'Ew', 'rain']
208 else:
209 Et = np.vstack((Ed, Ew)).T
210 features_list = ['Ed', 'Ew']
211 datat = np.reshape(fm,[fm.shape[0],1])
213 # split data
214 print('batch_size=',batch_size)
215 if batch_size is None or batch_size is np.inf:
216 x_train, y_train = staircase(Et,datat,timesteps=timesteps,datapoints=h2,
217 return_sequences=False, verbose = verbose)
218 batches = None
219 else:
220 x_train, y_train = staircase_2(Et,datat,timesteps,batch_size,trainsteps=h2,
221 return_sequences=False, verbose = verbose)
223 vprint('x_train shape=',x_train.shape)
224 vprint('y_train shape=',y_train.shape)
225 samples, timesteps, features = x_train.shape
227 h0 = tf.convert_to_tensor(datat[:samples],dtype=tf.float32)
229 # Set up return dictionary
231 rnn_dat = {
232 'hours': hours,
233 'x_train': x_train,
234 'y_train': y_train,
235 'Et': Et,
236 'samples': samples,
237 'timesteps': timesteps,
238 'features': features,
239 'h0': h0,
240 'hours':hours,
241 'h2':h2,
242 'scale':scale,
243 'scale_fm':scale_fm,
244 'scale_rain':scale_rain,
245 'rain_do':rain_do,
246 'features_list':features_list
249 return rnn_dat
252 def train_rnn_2(rnn_dat, params,hours, fit=True):
254 verbose = params['verbose']
256 if hours is None:
257 hours = rnn_dat['hours']
259 samples = rnn_dat['samples']
260 features = rnn_dat['features']
261 timesteps = rnn_dat['timesteps']
262 centering = params['centering']
264 model_fit=create_RNN_2(hidden_units=params['hidden_units'],
265 dense_units=params['dense_units'],
266 batch_shape=(samples,timesteps,features),
267 stateful=True,
268 return_sequences=False,
269 # initial_state=h0,
270 activation=params['activation'],
271 dense_layers=params['dense_layers'],
272 verbose = verbose)
274 Et = rnn_dat['Et']
275 model_predict=create_RNN_2(hidden_units=params['hidden_units'],
276 dense_units=params['dense_units'],
277 input_shape=(hours,features),stateful = False,
278 return_sequences=True,
279 activation=params['activation'],
280 dense_layers=params['dense_layers'],
281 verbose = verbose)
283 if verbose: print(model_predict.summary())
285 x_train = rnn_dat['x_train']
286 y_train = rnn_dat['y_train']
288 model_fit(x_train) ## evalue the model once to set nonzero initial state
290 w, w_name=get_initial_weights(model_fit, params, rnn_dat)
292 model_fit.set_weights(w)
294 if fit:
295 model_fit.fit(x_train, y_train + centering[1] , epochs=params['epochs'],batch_size=samples, verbose=params['verbose_fit'])
296 w_fitted=model_fit.get_weights()
297 if params['verbose_weights']:
298 for i in range(len(w_fitted)):
299 print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
300 'fitted: sum',np.sum(w_fitted[i],axis=0),'\nentries',w_fitted[i])
301 else:
302 print('Fitting skipped, using initial weights')
303 w_fitted=w
305 model_predict.set_weights(w_fitted)
307 return model_predict
309 from tensorflow.keras.callbacks import Callback
311 class ResetStatesCallback(Callback):
312 def on_epoch_end(self, epoch, logs=None):
313 self.model.reset_states()
316 def train_rnn(rnn_dat, params,hours, fit=True, callbacks=[]):
318 verbose = params['verbose']
320 if hours is None:
321 hours = get_item(rnn_dat,'hours')
323 samples = get_item(rnn_dat,'samples')
324 features = get_item(rnn_dat,'features')
325 timesteps = get_item(rnn_dat,'timesteps')
326 centering = get_item(params,'centering')
327 training = get_item(params,'training')
328 batch_size = get_item(params,'batch_size')
330 single_batch = batch_size is None or batch_size is np.inf
331 if single_batch:
332 batch_size=samples
333 print('replacing batch_size =',batch_size)
335 epochs=get_item(params,'epochs')
337 model_fit=create_RNN_2(hidden_units=params['hidden_units'],
338 dense_units=params['dense_units'],
339 batch_shape=(batch_size,timesteps,features),
340 stateful=True,
341 return_sequences=False,
342 # initial_state=h0,
343 activation=params['activation'],
344 dense_layers=params['dense_layers'],
345 verbose = verbose)
347 Et = rnn_dat['Et']
348 model_predict=create_RNN_2(hidden_units=params['hidden_units'],
349 dense_units=params['dense_units'],
350 input_shape=(hours,features),stateful = False,
351 return_sequences=True,
352 activation=params['activation'],
353 dense_layers=params['dense_layers'],
354 verbose = verbose)
356 if verbose: print(model_predict.summary())
358 x_train = rnn_dat['x_train']
359 y_train = rnn_dat['y_train']
361 print('x_train shape =',x_train.shape)
362 print('y_train shape =',y_train.shape)
363 print('x_train hash =',hash2(x_train))
364 print('y_train hash =',hash2(y_train))
366 if single_batch:
367 model_fit(x_train) ## evalue the model once to set nonzero initial state for compatibility
368 w, w_name=get_initial_weights(model_fit, params, rnn_dat)
369 print('initial weights hash =',hash2(w))
371 if fit:
372 model_fit.set_weights(w)
373 if single_batch:
374 model_fit.fit(x_train,
375 y_train + centering[1] ,
376 epochs=epochs,
377 batch_size=samples,
378 callbacks = callbacks,
379 verbose=get_item(params,'verbose_fit')
381 else:
382 if training == 1:
383 print('training by batches')
384 batches = samples // batch_size
385 for i in range(epochs):
386 for j in range(batches):
387 batch = slice(j*batch_size, (j+1)*batch_size)
388 model_fit.fit(x_train[batch],
389 y_train[batch] + centering[1] ,
390 epochs=1,
391 callbacks = callbacks,
392 verbose=params['verbose_fit'])
393 model_fit.reset_states()
394 print('epoch',i,'batch j','done')
395 print('epoch',i,'done')
396 elif training==2:
397 print('training by epoch, resetting state after each epoch')
398 for i in range(epochs):
399 model_fit.fit(x_train,
400 y_train + centering[1] ,
401 epochs=1,
402 batch_size=samples,
403 callbacks = callbacks,
404 verbose=params['verbose_fit'])
405 model_fit.reset_states()
406 print('epoch',i,'done')
407 elif training==3:
408 print('training by epoch, not resetting state after each epoch')
409 for i in range(epochs):
410 model_fit.fit(x_train,
411 y_train + centering[1] ,
412 epochs=1,
413 batch_size=samples,
414 callbacks = callbacks,
415 verbose=params['verbose_fit'])
416 print('epoch',i,'done')
417 elif training==4:
418 print('training in one pass, not resetting state after each epoch')
419 model_fit.fit(x_train,
420 y_train + centering[1] ,
421 epochs=epochs,
422 batch_size=batch_size,
423 callbacks = callbacks,
424 verbose=params['verbose_fit'])
426 elif training==5:
427 print('training in one pass, resetting state after each epoch by callback')
428 model_fit.fit(x_train,
429 y_train + centering[1] ,
430 epochs=epochs,
431 batch_size=batch_size,
432 callbacks = callbacks + [ResetStatesCallback()],
433 verbose=params['verbose_fit'])
435 w_fitted=model_fit.get_weights()
436 print('fitted weights hash =',hash2(w_fitted))
437 if params['verbose_weights']:
438 for i in range(len(w_fitted)):
439 print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
440 'fitted: sum',np.sum(w_fitted[i],axis=0),'\nentries',w_fitted[i])
441 else:
442 print('Fitting skipped, using initial weights')
443 w_fitted=w
445 model_predict.set_weights(w_fitted)
447 return model_predict
449 def get_initial_weights(model_fit,params,rnn_dat):
450 # Given a RNN architecture and hyperparameter dictionary, return array of physics-initiated weights
451 # Inputs:
452 # model_fit: output of create_RNN_2 with no training
453 # params: (dict) dictionary of hyperparameters
454 # rnn_dat: (dict) data dictionary, output of create_rnn_dat
455 # Returns: numpy ndarray of weights that should be a rough solution to the moisture ODE
456 DeltaE = params['DeltaE']
457 T1 = params['T1']
458 fmr = params['fm_raise_vs_rain']
459 centering = params['centering'] # shift activation down
461 w0_initial={'Ed':(1.-np.exp(-T1))/2,
462 'Ew':(1.-np.exp(-T1))/2,
463 'rain':fmr * rnn_dat['scale_fm']/rnn_dat['scale_rain']} # wx - input feature
464 # wh wb wd bd = bias -1
466 w_initial=np.array([np.nan, np.exp(-0.1), DeltaE[0]/rnn_dat['scale_fm'], # layer 0
467 1.0, -centering[0] + DeltaE[1]/rnn_dat['scale_fm']]) # layer 1
468 if params['verbose_weights']:
469 print('Equilibrium moisture correction bias',DeltaE[0],
470 'in the hidden layer and',DeltaE[1],' in the output layer')
472 w_name = ['wx','wh','bh','wd','bd']
474 w=model_fit.get_weights()
475 for j in range(w[0].shape[0]):
476 feature = rnn_dat['features_list'][j]
477 for k in range(w[0].shape[1]):
478 w[0][j][k]=w0_initial[feature]
479 for i in range(1,len(w)): # number of the weight
480 for j in range(w[i].shape[0]): # number of the inputs
481 if w[i].ndim==2:
482 # initialize all entries of the weight matrix to the same number
483 for k in range(w[i].shape[1]):
484 w[i][j][k]=w_initial[i]/w[i].shape[0]
485 elif w[i].ndim==1:
486 w[i][j]=w_initial[i]
487 else:
488 print('weight',i,'shape',w[i].shape)
489 raise ValueError("Only 1 or 2 dimensions supported")
490 if params['verbose_weights']:
491 print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
492 'initial: sum',np.sum(w[i],axis=0),'\nentries',w[i])
494 return w, w_name
496 def rnn_predict(model, params, rnn_dat):
497 # Given compiled model, hyperparameters, and data dictionary, return array of predicted values.
498 # NOTE: this does not distinguish in-sample vs out-of-sample data, it just fits the model to the given data
499 # Inputs:
500 # model: compiled moisture model
501 # params: (dict) dictionary of hyperparameters
502 # rnn_dat: (dict) dictionary of data to fit model to, output of create_rnn_dat
503 # Returns: numpy array of fitted moisture values
504 verbose = params['verbose']
505 centering = params['centering']
506 features = rnn_dat['features']
507 hours = rnn_dat['hours']
509 # model.set_weights(w_fitted)
510 x_input=np.reshape(rnn_dat['Et'],(1, hours, features))
511 y_output = model.predict(x_input, verbose = verbose) - centering[1]
513 vprint('x_input.shape=',x_input.shape,'y_output.shape=',y_output.shape)
515 m = np.reshape(y_output,hours)
516 # print('weights=',w)
517 if rnn_dat['scale']:
518 vprint('scaling')
519 m = m*rnn_dat['scale_fm']
520 m = np.reshape(m,hours)
521 return m
523 def run_rnn(case_data,params,fit=True,title2=''):
524 # Run RNN on given case dictionary of FMDA data with certain params. Used internally in run_case
525 # Inputs:
526 # case_data: (dict) collection of cases with FMDA data. Cases are different locations and times, or synthetic data
527 # params: (dict) collection of run options for model
528 # fit: (bool) whether or not to fit RNN to data
529 # title2: (str) title of RNN run to be joined to string with other info for plotting title
530 verbose = params['verbose']
532 reproducibility.set_seed() # Set seed for reproducibility
533 rnn_dat = create_rnn_data(case_data,params)
534 if params['verbose']:
535 check_data(rnn_dat,case=0,name='rnn_dat')
536 model_predict = train_rnn(
537 rnn_dat,
538 params,
539 rnn_dat['hours'],
540 fit=fit
543 m = rnn_predict(model_predict, params, rnn_dat)
544 case_data['m'] = m
546 hv = hash2(model_predict.get_weights())
547 if case_data['case']=='case11' and fit:
548 hv5 = 5.55077327554663e+19
549 print('check 5:',hv, 'should be',hv5,'error',hv-hv5)
550 # assert (hv == hv5)
551 checkm = case_data['m'][350]
552 mv = 3.77920889854431152
553 print('checkm=',format(checkm, '.17f'),' error',checkm-mv)
554 else:
555 print('check - hash weights:',hv)
557 plot_data(case_data,title2=title2)
558 plt.show()
559 return m, rmse_data(case_data)
562 def run_case(case_data,params, check_data=False):
563 # Given a formatted FMDA dictionary of data, run RNN model using given run params
564 # Inputs:
565 # case_data: (dict)
566 # params: (dict) collection of run options for model
567 # Return: (dict) collection of RMSE error for RNN & EKF, for train and prediction periods
568 print('\n***** ',case_data['case'],' *****\n')
569 case_data['rain'][np.isnan(case_data['rain'])] = 0
570 if check_data:
571 check_data(case_data)
572 hours=case_data['hours']
573 if 'train_frac' in params:
574 case_data['h2'] = round(hours * params['train_frac'])
575 h2=case_data['h2']
576 plot_data(case_data,title2='case data on input')
577 m,Ec = mod.run_augmented_kf(case_data) # extract from state
578 case_data['m']=m
579 case_data['Ec']=Ec
580 plot_data(case_data,title2='augmented KF')
581 rmse = {'Augmented KF':rmse_data(case_data)}
582 del case_data['Ec'] # cleanup
583 rmse.update({'RNN initial':run_rnn(case_data,params,fit=False,title2='with initial weights, no fit')[1]})
584 rmse.update({'RNN trained':run_rnn(case_data,params,fit=True,title2='with trained RNN')[1]})
585 return rmse