fmda/data_funcs.py

   1 ## Set of Functions to process and format fuel moisture model inputs
   2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   3
   4 import numpy as np, random
   5 from numpy.random import rand
   6 from MesoPy import Meso
   7
   8 import matplotlib.pyplot as plt
   9 from moisture_models import model_decay, model_moisture
  10 from datetime import datetime, timedelta
  11 import json
  12
  13 # Helper Functions
  14 verbose = False ## Must be declared in environment
  15 def vprint(*args):
  16     if verbose:
  17         for s in args[:(len(args)-1)]:
  18             print(s, end=' ')
  19         print(args[-1])
  20
  21 def to_json(dic,filename):
  22     print('writing ',filename)
  23     check_data(dic)
  24     new={}
  25     type_nparray=type(np.array([]))
  26     for i in dic:
  27         if type(dic[i]) == type_nparray:
  28             new[i]=dic[i].tolist()  # because numpy.ndarray is not serializable
  29         else:
  30             new[i]=dic[i]
  31         # print('i',type(new[i]))
  32     json.dump(new,open(filename,'w'),indent=4)
  33
  34 def from_json(filename):
  35     print('reading ',filename)
  36     dic=json.load(open(filename,'r'))
  37     new={}
  38     for i in dic:
  39         if type(dic[i]) is list:
  40             new[i]=np.array(dic[i])  # because ndarray is not serializable
  41         else:
  42             new[i]=dic[i]
  43     check_data(new)
  44     return new
  45
  46 # Function to simulate moisture data and equilibrium for model testing
  47 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
  48     hours = days*24
  49     h2 = int(hours/2)
  50     hour = np.array(range(hours))
  51     day = np.array(range(hours))/24.
  52
  53     # artificial equilibrium data
  54     E = 100.0*np.power(np.sin(np.pi*day),4) # diurnal curve
  55     E = 0.05+0.25*E
  56     # FMC free run
  57     m_f = np.zeros(hours)
  58     m_f[0] = 0.1         # initial FMC
  59     process_noise=0.
  60     for t in range(hours-1):
  61         m_f[t+1] = max(0.,model_decay(m_f[t],E[t])  + random.gauss(0,process_noise) )
  62     data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
  63     E = E + DeltaE
  64     return E,m_f,data,hour,h2,DeltaE
  65
  66 # the following input or output dictionary with all model data and variables
  67
  68 def check_data_array(dat,h,a,s):
  69     if a in dat:
  70         ar = dat[a]
  71         print("array %s %s length %i min %s max %s %s" % (a,s,len(ar),min(ar),max(ar),type(ar)))
  72         if h is not None:
  73             if len(ar) < h:
  74                 print('Warning: len(%a) < %i' % (a,ho))
  75                 exit(1)
  76     else:
  77         print(a + ' not present')
  78
  79 def check_data_scalar(dat,a):
  80     if a in dat:
  81         print('%s = %s' % (a,dat[a]),' ',type(dat[a]))
  82     else:
  83         print(a + ' not present' )
  84
  85 def check_data(dat,hours=None):
  86     check_data_scalar(dat,'hours')
  87     check_data_scalar(dat,'h2')
  88     if hours is None:
  89         hours = dat['hours']
  90     else:
  91         print('specified hours=%s for check_data' % hours)
  92     check_data_array(dat,hours,'E','drying equilibrium (%)')
  93     check_data_array(dat,hours,'Ed','drying equilibrium (%)')
  94     check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
  95     check_data_array(dat,hours,'Ec','equilibrium equilibrium (%)')
  96     check_data_array(dat,hours,'rain','rain intensity (mm/h)')
  97     check_data_array(dat,hours,'fm','RAWS fuel moisture data (%)')
  98     check_data_array(dat,hours,'m','fuel moisture estimate (%)')
  99     check_data_array(dat,hours,'m_rnn','RNN fuel moisture estimate (%)')
 100     check_data_array(dat,hours,'m_kf','KF fuel moisture estimate (%)')
 101
 102
 103 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,
 104     DeltaE=0.0,Emin=5,Emax=30,p_rain=0.01,max_rain=10.0):
 105     hours = days*24
 106     h2 = int(hours/2)
 107     hour = np.array(range(hours))
 108     day = np.array(range(hours))/24.
 109     # artificial equilibrium data
 110     E = np.power(np.sin(np.pi*day),power) # diurnal curve betwen 0 and 1
 111     E = Emin+(Emax - Emin)*E
 112     E = E + DeltaE
 113     Ed=E+0.5
 114     Ew=np.maximum(E-0.5,0)
 115     rain = np.multiply(rand(hours) < p_rain, rand(hours)*max_rain)
 116     # FMC free run
 117     m_f = np.zeros(hours)
 118     m_f[0] = 0.1         # initial FMC
 119     # process_noise=0.
 120     for t in range(hours-1):
 121         m_f[t+1] = max(0.,model_moisture(m_f[t],Ed[t-1],Ew[t-1],rain[t-1])  + random.gauss(0,process_noise))
 122     m_f = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
 123     dat = {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hours':hours,'h2':h2,'DeltaE':DeltaE,'rain':rain,'title':'Synthetic data'}
 124
 125     return dat
 126
 127 def plot_one(hmin,hmax,dat,name,linestyle,c,label,type='plot'):
 128 # helper for plot_data
 129     if name in dat:
 130         h = len(dat[name])
 131         if hmin is None:
 132             hmin=0
 133         if hmax is None:
 134             hmax=len(dat[name])
 135         hour = np.array(range(hmin,hmax))
 136         if type=='plot':
 137             plt.plot(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
 138         elif type=='scatter':
 139             plt.scatter(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
 140
 141 def plot_data(dat,title=None,title2=None,hmin=None,hmax=None):
 142     if 'hours' in dat:
 143         if hmax is None:
 144             hmax = dat['hours']
 145         else:
 146             hmax = max(hmax, dat['hours'])
 147     plt.figure(figsize=(16,4))
 148     plot_one(hmin,hmax,dat,'E',linestyle='--',c='r',label='equilibrium')
 149     plot_one(hmin,hmax,dat,'Ed',linestyle='--',c='r',label='drying equilibrium')
 150     plot_one(hmin,hmax,dat,'Ew',linestyle='--',c='b',label='wetting equilibrium')
 151     plot_one(hmin,hmax,dat,'m_f',linestyle='-',c='g',label='FMC truth')
 152     plot_one(hmin,hmax,dat,'fm',linestyle=':',c='b',label='FMC observation')
 153     plot_one(hmin,hmax,dat,'m',linestyle='-',c='k',label='FMC estimate')
 154     plot_one(hmin,hmax,dat,'Ec',linestyle='-',c='g',label='equilibrium correction')
 155     plot_one(hmin,hmax,dat,'rain',linestyle='-',c='b',label='rain intensity')
 156     if title is not None:
 157         t = title
 158     else:
 159         t=dat['title']
 160         # print('title',type(t),t)
 161     if title2 is not None:
 162         t = t + ' ' + title2
 163     plt.title(t)
 164     plt.xlabel('Time (hours)')
 165     if 'rain' in dat:
 166         plt.ylabel('FMC (%) / Rain mm/h')
 167     else:
 168         plt.ylabel('Fuel moisture content (%)')
 169     plt.legend()
 170
 171 # Calculate mean squared error
 172 def mse(a, b):
 173     return ((a - b)**2).mean()
 174
 175 # Calculate mean absolute error
 176 def mape(a, b):
 177     return ((a - b).__abs__()).mean()
 178
 179 def mse_data(dat):
 180     h2 = dat['h2']
 181     hours = dat['hours']
 182     m = dat['m']
 183     fm = dat['fm']
 184     print('Training MSE:   ' + str(np.round(mse(m[:h2], fm[:h2]), 4)))
 185     print('Prediction MSE: ' + str(np.round(mse(m[h2:hours], fm[h2:hours]), 4)))
 186
 187 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 188
 189 ## RAWS Data Functions
 190
 191 def format_raws(stn, fixnames = True):
 192     raws_dat = stn['OBSERVATIONS']
 193
 194     # Convert to Numpy arrays, check data type for floats
 195     for key in [*stn['OBSERVATIONS'].keys()]:
 196         if type(stn['OBSERVATIONS'][key][0]) is float:
 197             raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
 198         else:
 199             raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
 200
 201     # Transform Data
 202     raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
 203     if 'precip_accum_set_1' in raws_dat.keys():
 204         raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
 205
 206
 207     # Calculate Equilibrium Temps
 208     raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
 209     raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
 210
 211     # Fix nan values
 212     for key in [*raws_dat.keys()]:
 213         if type(raws_dat[key][0]) is float:
 214             raws_dat[key] = fixnan(raws_dat[key], 2)
 215
 216     # Simplify names
 217     if fixnames:
 218         var_mapping = {
 219             'date_time': 'time', 'precip_accum': 'rain',
 220             'fuel_moisture': 'fm', 'relative_humidity': 'rh',
 221             'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
 222             }
 223         old_keys = [*raws_dat.keys()]
 224         old_keys = [k.replace("_set_1", "") for k in old_keys]
 225         new_keys = []
 226         for key in old_keys:
 227             new_keys.append(var_mapping.get(key, key))
 228         old_keys = [*raws_dat.keys()]
 229         old_keys = [k.replace("_set_1", "") for k in old_keys]
 230         new_keys = []
 231         for key in old_keys:
 232             new_keys.append(var_mapping.get(key, key))
 233         raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
 234         return raws_dat2
 235
 236     else: return raws_dat
 237
 238 def format_precip(precipa):
 239     rain=np.array(precipa, dtype = 'float64')
 240     rain = np.diff(rain) # first difference to convert accumulated to hourly
 241     rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
 242     rain[rain > 1000] = np.NaN # filter out erroneously high
 243     rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
 244     return rain
 245
 246 # fix isolated nans
 247 def fixnan(a,n):
 248     for c in range(n):
 249         for i in np.where(np.isnan(a)):
 250             a[i]=0.5*(a[i-1]+a[i+1])
 251         if not any(np.isnan(a)):
 252             break
 253     return a
 254
 255 def retrieve_raws(mes, stid, raws_vars, time1, time2):
 256     meso_ts = mes.timeseries(time1, time2,
 257                        stid=stid, vars=raws_vars)
 258     station = meso_ts['STATION'][0]
 259
 260     raws_dat = format_raws(station)
 261
 262     return station, raws_dat
 263
 264 def raws_data(start=None, hours=None, h2=None, stid=None,meso_token=None):
 265     # input:
 266     #   start YYYYMMDDhhmm
 267     #   hours legth of the period
 268     #   h2 (optional) length of the training period
 269     #   stid  the station id
 270     time_start=start
 271     time_end = datetime.strptime(start, "%Y%m%d%H%M") + timedelta(hours = hours+1) # end time, plus a buffer to control for time shift
 272     time_end = str(int(time_end.strftime("%Y%m%d%H%M")))
 273     print('data_raws: Time Parameters:')
 274     print('-'*50)
 275     print('Time Start:', datetime.strptime(time_start, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
 276     print('Time End:', datetime.strptime(time_end, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
 277     print('Total Runtime:', hours, 'hours')
 278     print('Training Time:', h2, 'hours')
 279     print('-'*50)
 280     raws_vars='air_temp,relative_humidity,precip_accum,fuel_moisture'
 281     m=Meso(meso_token)
 282     station, raws_dat = retrieve_raws(m, stid, raws_vars, time_start, time_end)
 283     raws_dat['title']='RAWS data station ' + stid
 284     raws_dat.update({'hours':hours,'h2':h2,'station':station})
 285     print('Data Read:')
 286     print('-'*50)
 287     print('Station ID:', station['STID'])
 288     print('Lat / Lon:', station['LATITUDE'],', ',station['LONGITUDE'])
 289     if(station['QC_FLAGGED']): print('WARNING: station flagged for QC')
 290     print('-'*50)
 291     raws_dat.update({'hours':hours,'h2':h2})
 292     return raws_dat
 293
 294 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~