fmda/moisture_rnn.py

   1 import numpy as np
   2 import tensorflow as tf
   3 from keras.models import Sequential
   4 from keras.layers import Dense, SimpleRNN
   5 # from keras.utils.vis_utils import plot_model
   6 from keras.utils import plot_model
   7 from sklearn.preprocessing import MinMaxScaler
   8 from sklearn.metrics import mean_squared_error
   9 import math
  10 import matplotlib.pyplot as plt
  11 import tensorflow as tf
  12 import keras.backend as K
  13 import tensorflow as tf
  14 import pandas as pd
  15 from utils import vprint, hash2, get_item
  16 import reproducibility
  17 from data_funcs import check_data, rmse_data, plot_data
  18 import moisture_models as mod
  19 import sys
  20
  21
  22 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
  23     # x [datapoints,features]    all inputs
  24     # y [datapoints,outputs]
  25     # timesteps: split x and y into samples length timesteps, shifted by 1
  26     # datapoints: number of timesteps to use for training, no more than y.shape[0]
  27     vprint('staircase: shape x = ',x.shape)
  28     vprint('staircase: shape y = ',y.shape)
  29     vprint('staircase: timesteps=',timesteps)
  30     vprint('staircase: datapoints=',datapoints)
  31     vprint('staircase: return_sequences=',return_sequences)
  32     outputs = y.shape[1]
  33     features = x.shape[1]
  34     samples = datapoints-timesteps+1
  35     vprint('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
  36     x_train = np.empty([samples, timesteps, features])
  37     if return_sequences:
  38         vprint('returning all timesteps in a sample')
  39         y_train = np.empty([samples, timesteps, outputs])  # all
  40         for i in range(samples):
  41             for k in range(timesteps):
  42                 x_train[i,k,:] = x[i+k,:]
  43                 y_train[i,k,:] = y[i+k,:]
  44     else:
  45         vprint('returning only the last timestep in a sample')
  46         y_train = np.empty([samples, outputs])
  47         for i in range(samples):
  48             for k in range(timesteps):
  49                 x_train[i,k,:] = x[i+k,:]
  50             y_train[i,:] = y[i+timesteps-1,:]
  51
  52     return x_train, y_train
  53
  54 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = True):
  55     # create RNN training data in multiple batches
  56     # input:
  57     #     x (,features)
  58     #     y (,outputs)
  59     #     timesteps: split x and y into sequences length timesteps
  60     #                a.k.a. lookback or sequence_length
  61
  62     # print params if verbose
  63
  64     if batch_size is None:
  65         raise ValueError('staircase_2 requires batch_size')
  66     print('staircase_2: shape x = ',x.shape)
  67     print('staircase_2: shape y = ',y.shape)
  68     print('staircase_2: timesteps=',timesteps)
  69     print('staircase_2: batch_size=',batch_size)
  70     print('staircase_2: return_sequences=',return_sequences)
  71
  72     nx,features= x.shape
  73     ny,outputs = y.shape
  74     datapoints = min(nx,ny,trainsteps)
  75     print('staircase_2: datapoints=',datapoints)
  76
  77     # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
  78     # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
  79
  80     # example with timesteps=3 batch_size=3 datapoints=10
  81     #     batch 0: [0 1 2]   [1 2 3]   [2 3 4]
  82     #     batch 1: [3 4 5]   [4 5 6]   [5 6 7]
  83     #     batch 2: [6 7 8]   [7 8 9]    when runs out this is the last batch, can be shorter
  84     #
  85     #     the first sequence in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
  86
  87     max_batches = datapoints // timesteps
  88     max_sequences = max_batches * batch_size
  89
  90     print('staircase_2: max_batches=',max_batches)
  91     print('staircase_2: max_sequences=',max_sequences)
  92
  93     x_train = np.zeros((max_sequences, timesteps, features))
  94     if return_sequences:
  95         y_train = np.empty((max_sequences, timesteps, outputs))
  96     else:
  97         y_train = np.empty((max_sequences, outputs ))
  98
  99     # build the sequences
 100     k=0
 101     for i in range(max_batches):
 102         for j in range(batch_size):
 103             begin = i*timesteps + j
 104             next  = begin + timesteps
 105             if next > datapoints:
 106                 break
 107             vprint('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
 108             x_train[k,:,:] = x[begin:next,:]
 109             if return_sequences:
 110                  y_train[k,:,:] = y[begin:next,:]
 111             else:
 112                  y_train[k,:] = y[next-1,:]
 113             k += 1
 114
 115     print('staircase_2: shape x_train = ',x_train.shape)
 116     print('staircase_2: shape y_train = ',y_train.shape)
 117     print('staircase_2: sequences generated',k)
 118     print('staircase_2: batch_size=',batch_size)
 119     k = (k // batch_size) * batch_size
 120     print('staircase_2: removing partial and empty batches at the end, keeping',k)
 121     x_train = x_train[:k,:,:]
 122     if return_sequences:
 123          y_train = y_train[:k,:,:]
 124     else:
 125          y_train = y_train[:k,:]
 126
 127     print('staircase_2: shape x_train = ',x_train.shape)
 128     print('staircase_2: shape y_train = ',y_train.shape)
 129
 130     return x_train, y_train
 131
 132 def create_RNN_2(hidden_units, dense_units, activation, stateful=False,
 133                  batch_shape=None, input_shape=None, dense_layers=1,
 134                  rnn_layers=1,return_sequences=False,
 135                  initial_state=None, verbose = True):
 136     if verbose:
 137         print("Function: moisture_rnn.create_RNN_2")
 138         print("Arguments:")
 139         arg_dict = locals().copy()
 140         for arg in arg_dict:
 141             if arg != "self":
 142                 print(f"  {arg} = {arg_dict[arg]}")
 143     if stateful:
 144         inputs = tf.keras.Input(batch_shape=batch_shape)
 145     else:
 146         inputs = tf.keras.Input(shape=input_shape)
 147     # https://stackoverflow.com/questions/43448029/how-can-i-print-the-values-of-keras-tensors
 148     x = inputs
 149     for i in range(rnn_layers):
 150         x = tf.keras.layers.SimpleRNN(hidden_units,activation=activation[0],
 151               stateful=stateful,return_sequences=return_sequences)(x)
 152     for i in range(dense_layers):
 153         x = tf.keras.layers.Dense(dense_units, activation=activation[1])(x)
 154     model = tf.keras.Model(inputs=inputs, outputs=x)
 155     model.compile(loss='mean_squared_error', optimizer='adam')
 156     return model
 157
 158 def create_rnn_data(dat, params, hours=None, h2=None):
 159     # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
 160     # Inputs:
 161     # dat: (dict) fmda dictionary
 162     # params: (dict) hyperparameters
 163     # hours: (int) optional parameter to set total length of train+predict
 164     # h2: (int) optional parameter to set as length of training period (h2 = total - hours)
 165     # Returns: (dict) formatted datat used in RNN
 166     timesteps = params['timesteps']
 167     scale = params['scale']
 168     rain_do = params['rain_do']
 169     verbose = params['verbose']
 170     batch_size = params['batch_size']
 171
 172     if hours is None:
 173         hours = dat['hours']
 174     if h2 is None:
 175         h2 = dat['h2']
 176     vprint('create_rnn_data: hours=',hours,' h2=',h2)
 177     # extract inputs the windown of interest
 178     Ew = dat['Ew']
 179     Ed = dat['Ed']
 180     rain = dat['rain']
 181     fm = dat['fm']
 182     # temp = dat['temp']
 183
 184     # Average Equilibrium
 185     # E = (Ed + Ew)/2         # why?
 186
 187     # Scale Data if required
 188     if scale:
 189         print('scaling to range 0 to',scale)
 190         scale_fm=max(max(Ew),max(Ed),max(fm))/scale
 191         scale_rain=max(max(rain),0.01)/scale
 192         Ed = Ed/scale_fm
 193         Ew = Ew/scale_fm
 194         fm = fm/scale_fm
 195         rain = rain/scale_rain
 196     else:
 197         scale_fm=1.0
 198         scale_rain=1.0
 199
 200     if params['verbose_weights']:
 201         print('scale_fm=',scale_fm,'scale_rain=',scale_rain)
 202
 203     # transform as 2D, (timesteps, features) and (timesteps, outputs)
 204
 205     if rain_do:
 206         Et = np.vstack((Ed, Ew, rain)).T
 207         features_list = ['Ed', 'Ew', 'rain']
 208     else:
 209         Et = np.vstack((Ed, Ew)).T
 210         features_list = ['Ed', 'Ew']
 211     datat = np.reshape(fm,[fm.shape[0],1])
 212
 213     # split data
 214     print('batch_size=',batch_size)
 215     if batch_size is None or batch_size is np.inf:
 216         x_train, y_train = staircase(Et,datat,timesteps=timesteps,datapoints=h2,
 217                                  return_sequences=False, verbose = verbose)
 218         batches = None
 219     else:
 220         x_train, y_train = staircase_2(Et,datat,timesteps,batch_size,trainsteps=h2,
 221                                  return_sequences=False, verbose = verbose)
 222
 223     vprint('x_train shape=',x_train.shape)
 224     vprint('y_train shape=',y_train.shape)
 225     samples, timesteps, features = x_train.shape
 226
 227     h0 = tf.convert_to_tensor(datat[:samples],dtype=tf.float32)
 228
 229     # Set up return dictionary
 230
 231     rnn_dat = {
 232         'hours': hours,
 233         'x_train': x_train,
 234         'y_train': y_train,
 235         'Et': Et,
 236         'samples': samples,
 237         'timesteps': timesteps,
 238         'features': features,
 239         'h0': h0,
 240         'hours':hours,
 241         'h2':h2,
 242         'scale':scale,
 243         'scale_fm':scale_fm,
 244         'scale_rain':scale_rain,
 245         'rain_do':rain_do,
 246         'features_list':features_list
 247     }
 248
 249     return rnn_dat
 250
 251
 252 def train_rnn_2(rnn_dat, params,hours, fit=True):
 253
 254     verbose = params['verbose']
 255
 256     if hours is None:
 257         hours = rnn_dat['hours']
 258
 259     samples = rnn_dat['samples']
 260     features = rnn_dat['features']
 261     timesteps = rnn_dat['timesteps']
 262     centering = params['centering']
 263
 264     model_fit=create_RNN_2(hidden_units=params['hidden_units'],
 265                         dense_units=params['dense_units'],
 266                         batch_shape=(samples,timesteps,features),
 267                         stateful=True,
 268                         return_sequences=False,
 269                         # initial_state=h0,
 270                         activation=params['activation'],
 271                         dense_layers=params['dense_layers'],
 272                         verbose = verbose)
 273
 274     Et = rnn_dat['Et']
 275     model_predict=create_RNN_2(hidden_units=params['hidden_units'],
 276                         dense_units=params['dense_units'],
 277                         input_shape=(hours,features),stateful = False,
 278                         return_sequences=True,
 279                         activation=params['activation'],
 280                         dense_layers=params['dense_layers'],
 281                         verbose = verbose)
 282
 283     if verbose: print(model_predict.summary())
 284
 285     x_train = rnn_dat['x_train']
 286     y_train = rnn_dat['y_train']
 287
 288     model_fit(x_train) ## evalue the model once to set nonzero initial state
 289
 290     w, w_name=get_initial_weights(model_fit, params, rnn_dat)
 291
 292     model_fit.set_weights(w)
 293
 294     if fit:
 295         model_fit.fit(x_train, y_train + centering[1] , epochs=params['epochs'],batch_size=samples, verbose=params['verbose_fit'])
 296         w_fitted=model_fit.get_weights()
 297         if params['verbose_weights']:
 298             for i in range(len(w_fitted)):
 299                 print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
 300                   'fitted: sum',np.sum(w_fitted[i],axis=0),'\nentries',w_fitted[i])
 301     else:
 302         print('Fitting skipped, using initial weights')
 303         w_fitted=w
 304
 305     model_predict.set_weights(w_fitted)
 306
 307     return model_predict
 308
 309 from tensorflow.keras.callbacks import Callback
 310
 311 class ResetStatesCallback(Callback):
 312     def on_epoch_end(self, epoch, logs=None):
 313         self.model.reset_states()
 314
 315
 316 def train_rnn(rnn_dat, params,hours, fit=True, callbacks=[]):
 317
 318     verbose = params['verbose']
 319
 320     if hours is None:
 321         hours = get_item(rnn_dat,'hours')
 322
 323     samples = get_item(rnn_dat,'samples')
 324     features = get_item(rnn_dat,'features')
 325     timesteps = get_item(rnn_dat,'timesteps')
 326     centering = get_item(params,'centering')
 327     training = get_item(params,'training')
 328     batch_size = get_item(params,'batch_size')
 329
 330     single_batch = batch_size is None or batch_size is np.inf
 331     if single_batch:
 332         batch_size=samples
 333         print('replacing batch_size =',batch_size)
 334
 335     epochs=get_item(params,'epochs')
 336
 337     model_fit=create_RNN_2(hidden_units=params['hidden_units'],
 338                         dense_units=params['dense_units'],
 339                         batch_shape=(batch_size,timesteps,features),
 340                         stateful=True,
 341                         return_sequences=False,
 342                         # initial_state=h0,
 343                         activation=params['activation'],
 344                         dense_layers=params['dense_layers'],
 345                         verbose = verbose)
 346
 347     Et = rnn_dat['Et']
 348     model_predict=create_RNN_2(hidden_units=params['hidden_units'],
 349                         dense_units=params['dense_units'],
 350                         input_shape=(hours,features),stateful = False,
 351                         return_sequences=True,
 352                         activation=params['activation'],
 353                         dense_layers=params['dense_layers'],
 354                         verbose = verbose)
 355
 356     if verbose: print(model_predict.summary())
 357
 358     x_train = rnn_dat['x_train']
 359     y_train = rnn_dat['y_train']
 360
 361     print('x_train shape =',x_train.shape)
 362     print('y_train shape =',y_train.shape)
 363     print('x_train hash =',hash2(x_train))
 364     print('y_train hash =',hash2(y_train))
 365
 366     if single_batch:
 367          model_fit(x_train) ## evalue the model once to set nonzero initial state for compatibility
 368     w, w_name=get_initial_weights(model_fit, params, rnn_dat)
 369     print('initial weights hash =',hash2(w))
 370
 371     if fit:
 372         model_fit.set_weights(w)
 373         if single_batch:
 374             model_fit.fit(x_train,
 375                       y_train + centering[1] ,
 376                       epochs=epochs,
 377                       batch_size=samples,
 378                       callbacks = callbacks,
 379                       verbose=get_item(params,'verbose_fit')
 380                       )
 381         else:
 382             if training == 1:
 383                 print('training by batches')
 384                 batches = samples // batch_size
 385                 for i in range(epochs):
 386                     for j in range(batches):
 387                         batch = slice(j*batch_size, (j+1)*batch_size)
 388                         model_fit.fit(x_train[batch],
 389                               y_train[batch] + centering[1] ,
 390                               epochs=1,
 391                               callbacks = callbacks,
 392                               verbose=params['verbose_fit'])
 393                         model_fit.reset_states()
 394                         print('epoch',i,'batch j','done')
 395                     print('epoch',i,'done')
 396             elif training==2:
 397                 print('training by epoch, resetting state after each epoch')
 398                 for i in range(epochs):
 399                     model_fit.fit(x_train,
 400                         y_train + centering[1] ,
 401                         epochs=1,
 402                         batch_size=samples,
 403                         callbacks = callbacks,
 404                         verbose=params['verbose_fit'])
 405                     model_fit.reset_states()
 406                     print('epoch',i,'done')
 407             elif training==3:
 408                 print('training by epoch, not resetting state after each epoch')
 409                 for i in range(epochs):
 410                     model_fit.fit(x_train,
 411                         y_train + centering[1] ,
 412                         epochs=1,
 413                         batch_size=samples,
 414                         callbacks = callbacks,
 415                         verbose=params['verbose_fit'])
 416                     print('epoch',i,'done')
 417             elif training==4:
 418                 print('training in one pass, not resetting state after each epoch')
 419                 model_fit.fit(x_train,
 420                     y_train + centering[1] ,
 421                     epochs=epochs,
 422                     batch_size=batch_size,
 423                     callbacks = callbacks,
 424                     verbose=params['verbose_fit'])
 425
 426             elif training==5:
 427                 print('training in one pass, resetting state after each epoch by callback')
 428                 model_fit.fit(x_train,
 429                     y_train + centering[1] ,
 430                     epochs=epochs,
 431                     batch_size=batch_size,
 432                     callbacks = callbacks + [ResetStatesCallback()],
 433                     verbose=params['verbose_fit'])
 434
 435         w_fitted=model_fit.get_weights()
 436         print('fitted weights hash =',hash2(w_fitted))
 437         if params['verbose_weights']:
 438             for i in range(len(w_fitted)):
 439                 print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
 440                   'fitted: sum',np.sum(w_fitted[i],axis=0),'\nentries',w_fitted[i])
 441     else:
 442         print('Fitting skipped, using initial weights')
 443         w_fitted=w
 444
 445     model_predict.set_weights(w_fitted)
 446
 447     return model_predict
 448
 449 def get_initial_weights(model_fit,params,rnn_dat):
 450     # Given a RNN architecture and hyperparameter dictionary, return array of physics-initiated weights
 451     # Inputs:
 452     # model_fit: output of create_RNN_2 with no training
 453     # params: (dict) dictionary of hyperparameters
 454     # rnn_dat: (dict) data dictionary, output of create_rnn_dat
 455     # Returns: numpy ndarray of weights that should be a rough solution to the moisture ODE
 456     DeltaE = params['DeltaE']
 457     T1 = params['T1']
 458     fmr = params['fm_raise_vs_rain']
 459     centering = params['centering']  # shift activation down
 460
 461     w0_initial={'Ed':(1.-np.exp(-T1))/2,
 462                 'Ew':(1.-np.exp(-T1))/2,
 463                 'rain':fmr * rnn_dat['scale_fm']/rnn_dat['scale_rain']}   # wx - input feature
 464                                  #  wh      wb   wd    bd = bias -1
 465
 466     w_initial=np.array([np.nan, np.exp(-0.1), DeltaE[0]/rnn_dat['scale_fm'], # layer 0
 467                         1.0, -centering[0] + DeltaE[1]/rnn_dat['scale_fm']])                 # layer 1
 468     if params['verbose_weights']:
 469         print('Equilibrium moisture correction bias',DeltaE[0],
 470               'in the hidden layer and',DeltaE[1],' in the output layer')
 471
 472     w_name = ['wx','wh','bh','wd','bd']
 473
 474     w=model_fit.get_weights()
 475     for j in range(w[0].shape[0]):
 476             feature = rnn_dat['features_list'][j]
 477             for k in range(w[0].shape[1]):
 478                     w[0][j][k]=w0_initial[feature]
 479     for i in range(1,len(w)):            # number of the weight
 480         for j in range(w[i].shape[0]):   # number of the inputs
 481             if w[i].ndim==2:
 482                 # initialize all entries of the weight matrix to the same number
 483                 for k in range(w[i].shape[1]):
 484                     w[i][j][k]=w_initial[i]/w[i].shape[0]
 485             elif w[i].ndim==1:
 486                 w[i][j]=w_initial[i]
 487             else:
 488                 print('weight',i,'shape',w[i].shape)
 489                 raise ValueError("Only 1 or 2 dimensions supported")
 490         if params['verbose_weights']:
 491             print('weight',i,w_name[i],'shape',w[i].shape,'ndim',w[i].ndim,
 492                   'initial: sum',np.sum(w[i],axis=0),'\nentries',w[i])
 493
 494     return w, w_name
 495
 496 def rnn_predict(model, params, rnn_dat):
 497     # Given compiled model, hyperparameters, and data dictionary, return array of predicted values.
 498     # NOTE: this does not distinguish in-sample vs out-of-sample data, it just fits the model to the given data
 499     # Inputs:
 500     # model: compiled moisture model
 501     # params: (dict) dictionary of hyperparameters
 502     # rnn_dat: (dict) dictionary of data to fit model to, output of create_rnn_dat
 503     # Returns: numpy array of fitted moisture values
 504     verbose = params['verbose']
 505     centering = params['centering']
 506     features = rnn_dat['features']
 507     hours = rnn_dat['hours']
 508
 509     # model.set_weights(w_fitted)
 510     x_input=np.reshape(rnn_dat['Et'],(1, hours, features))
 511     y_output = model.predict(x_input, verbose = verbose) - centering[1]
 512
 513     vprint('x_input.shape=',x_input.shape,'y_output.shape=',y_output.shape)
 514
 515     m = np.reshape(y_output,hours)
 516     # print('weights=',w)
 517     if rnn_dat['scale']:
 518         vprint('scaling')
 519         m = m*rnn_dat['scale_fm']
 520     m = np.reshape(m,hours)
 521     return m
 522
 523 def run_rnn(case_data,params,fit=True,title2=''):
 524     # Run RNN on given case dictionary of FMDA data with certain params. Used internally in run_case
 525     # Inputs:
 526     # case_data: (dict) collection of cases with FMDA data. Cases are different locations and times, or synthetic data
 527     # params: (dict) collection of run options for model
 528     # fit: (bool) whether or not to fit RNN to data
 529     # title2: (str) title of RNN run to be joined to string with other info for plotting title
 530     verbose = params['verbose']
 531
 532     reproducibility.set_seed() # Set seed for reproducibility
 533     rnn_dat = create_rnn_data(case_data,params)
 534     if params['verbose']:
 535         check_data(rnn_dat,case=0,name='rnn_dat')
 536     model_predict = train_rnn(
 537         rnn_dat,
 538         params,
 539         rnn_dat['hours'],
 540         fit=fit
 541     )
 542
 543     m = rnn_predict(model_predict, params, rnn_dat)
 544     case_data['m'] = m
 545
 546     hv = hash2(model_predict.get_weights())
 547     if case_data['case']=='case11' and fit:
 548         hv5 = 5.55077327554663e+19
 549         print('check 5:',hv, 'should be',hv5,'error',hv-hv5)
 550         # assert (hv == hv5)
 551         checkm = case_data['m'][350]
 552         mv = 3.77920889854431152
 553         print('checkm=',format(checkm, '.17f'),' error',checkm-mv)
 554     else:
 555         print('check - hash weights:',hv)
 556
 557     plot_data(case_data,title2=title2)
 558     plt.show()
 559     return m, rmse_data(case_data)
 560
 561
 562 def run_case(case_data,params, check_data=False):
 563     # Given a formatted FMDA dictionary of data, run RNN model using given run params
 564     # Inputs:
 565     # case_data: (dict)
 566     # params: (dict) collection of run options for model
 567     # Return: (dict) collection of RMSE error for RNN & EKF, for train and prediction periods
 568     print('\n***** ',case_data['case'],' *****\n')
 569     case_data['rain'][np.isnan(case_data['rain'])] = 0
 570     if check_data:
 571         check_data(case_data)
 572     hours=case_data['hours']
 573     if 'train_frac' in params:
 574         case_data['h2'] = round(hours * params['train_frac'])
 575     h2=case_data['h2']
 576     plot_data(case_data,title2='case data on input')
 577     m,Ec = mod.run_augmented_kf(case_data)  # extract from state
 578     case_data['m']=m
 579     case_data['Ec']=Ec
 580     plot_data(case_data,title2='augmented KF')
 581     rmse =      {'Augmented KF':rmse_data(case_data)}
 582     del case_data['Ec']  # cleanup
 583     rmse.update({'RNN initial':run_rnn(case_data,params,fit=False,title2='with initial weights, no fit')[1]})
 584     rmse.update({'RNN trained':run_rnn(case_data,params,fit=True,title2='with trained RNN')[1]})
 585     return rmse
 586
 587
 588
 589