fmda/data_funcs.py

   1 ## Set of Functions to process and format fuel moisture model inputs
   2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   3
   4 import numpy as np, random
   5 from numpy.random import rand
   6 from MesoPy import Meso
   7
   8 import matplotlib.pyplot as plt
   9 from moisture_models import model_decay, model_moisture
  10 from datetime import datetime, timedelta
  11 import json
  12 from utils import hash2
  13
  14 def to_json(dic,filename):
  15     print('writing ',filename)
  16     check_data(dic)
  17     new={}
  18     for i in dic:
  19         if type(dic[i]) is np.ndarray:
  20             new[i]=dic[i].tolist()  # because numpy.ndarray is not serializable
  21         else:
  22             new[i]=dic[i]
  23         # print('i',type(new[i]))
  24     new['filename']=filename
  25     print('Hash: ', hash2(new))
  26     json.dump(new,open(filename,'w'),indent=4)
  27
  28 def from_json(filename):
  29     print('reading ',filename)
  30     dic=json.load(open(filename,'r'))
  31     new={}
  32     for i in dic:
  33         if type(dic[i]) is list:
  34             new[i]=np.array(dic[i])  # because ndarray is not serializable
  35         else:
  36             new[i]=dic[i]
  37     check_data(new)
  38     print('Hash: ', hash2(new))
  39     return new
  40
  41 # Function to simulate moisture data and equilibrium for model testing
  42 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
  43     hours = days*24
  44     h2 = int(hours/2)
  45     hour = np.array(range(hours))
  46     day = np.array(range(hours))/24.
  47
  48     # artificial equilibrium data
  49     E = 100.0*np.power(np.sin(np.pi*day),4) # diurnal curve
  50     E = 0.05+0.25*E
  51     # FMC free run
  52     m_f = np.zeros(hours)
  53     m_f[0] = 0.1         # initial FMC
  54     process_noise=0.
  55     for t in range(hours-1):
  56         m_f[t+1] = max(0.,model_decay(m_f[t],E[t])  + random.gauss(0,process_noise) )
  57     data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
  58     E = E + DeltaE
  59     return E,m_f,data,hour,h2,DeltaE
  60
  61 # the following input or output dictionary with all model data and variables
  62
  63 def check_data_array(dat,hours,a,s):
  64     if a in dat:
  65         ar = dat[a]
  66         print("array %s %s length %i min %s max %s %s" % (a,s,len(ar),min(ar),max(ar),type(ar)))
  67         if hours is not None:
  68             if len(ar) < hours:
  69                 print('len(%a) = %i does not equal to hours = %i' % (a,len(ar),hours))
  70                 exit(1)
  71     else:
  72         print(a + ' not present')
  73
  74 def check_data_scalar(dat,a):
  75     if a in dat:
  76         print('%s = %s' % (a,dat[a]),' ',type(dat[a]))
  77     else:
  78         print(a + ' not present' )
  79
  80 def check_data(dat):
  81     check_data_scalar(dat,'filename')
  82     check_data_scalar(dat,'title')
  83     check_data_scalar(dat,'note')
  84     check_data_scalar(dat,'hours')
  85     check_data_scalar(dat,'h2')
  86     if 'hours' in dat:
  87         hours = dat['hours']
  88     else:
  89         hours = None
  90     check_data_array(dat,hours,'E','drying equilibrium (%)')
  91     check_data_array(dat,hours,'Ed','drying equilibrium (%)')
  92     check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
  93     check_data_array(dat,hours,'Ec','equilibrium equilibrium (%)')
  94     check_data_array(dat,hours,'rain','rain intensity (mm/h)')
  95     check_data_array(dat,hours,'fm','RAWS fuel moisture data (%)')
  96     check_data_array(dat,hours,'m','fuel moisture estimate (%)')
  97
  98 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,
  99     DeltaE=0.0,Emin=5,Emax=30,p_rain=0.01,max_rain=10.0):
 100     hours = days*24
 101     h2 = int(hours/2)
 102     hour = np.array(range(hours))
 103     day = np.array(range(hours))/24.
 104     # artificial equilibrium data
 105     E = np.power(np.sin(np.pi*day),power) # diurnal curve betwen 0 and 1
 106     E = Emin+(Emax - Emin)*E
 107     E = E + DeltaE
 108     Ed=E+0.5
 109     Ew=np.maximum(E-0.5,0)
 110     rain = np.multiply(rand(hours) < p_rain, rand(hours)*max_rain)
 111     # FMC free run
 112     m_f = np.zeros(hours)
 113     m_f[0] = 0.1         # initial FMC
 114     # process_noise=0.
 115     for t in range(hours-1):
 116         m_f[t+1] = max(0.,model_moisture(m_f[t],Ed[t-1],Ew[t-1],rain[t-1])  + random.gauss(0,process_noise))
 117     m_f = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
 118     dat = {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hours':hours,'h2':h2,'DeltaE':DeltaE,'rain':rain,'title':'Synthetic data'}
 119
 120     return dat
 121
 122 def plot_one(hmin,hmax,dat,name,linestyle,c,label,type='plot'):
 123 # helper for plot_data
 124     if name in dat:
 125         h = len(dat[name])
 126         if hmin is None:
 127             hmin=0
 128         if hmax is None:
 129             hmax=len(dat[name])
 130         hour = np.array(range(hmin,hmax))
 131         if type=='plot':
 132             plt.plot(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
 133         elif type=='scatter':
 134             plt.scatter(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
 135
 136 def plot_data(dat,title=None,title2=None,hmin=None,hmax=None):
 137     if 'hours' in dat:
 138         if hmax is None:
 139             hmax = dat['hours']
 140         else:
 141             hmax = min(hmax, dat['hours'])
 142     plt.figure(figsize=(16,4))
 143     plot_one(hmin,hmax,dat,'E',linestyle='--',c='r',label='equilibrium')
 144     plot_one(hmin,hmax,dat,'Ed',linestyle='--',c='r',label='drying equilibrium')
 145     plot_one(hmin,hmax,dat,'Ew',linestyle='--',c='b',label='wetting equilibrium')
 146     plot_one(hmin,hmax,dat,'m_f',linestyle='-',c='g',label='FMC truth')
 147     plot_one(hmin,hmax,dat,'fm',linestyle=':',c='b',label='FMC observation')
 148     plot_one(hmin,hmax,dat,'m',linestyle='-',c='k',label='FMC estimate')
 149     plot_one(hmin,hmax,dat,'Ec',linestyle='-',c='g',label='equilibrium correction')
 150     plot_one(hmin,hmax,dat,'rain',linestyle='-',c='b',label='rain intensity')
 151     if title is not None:
 152         t = title
 153     else:
 154         t=dat['title']
 155         # print('title',type(t),t)
 156     if title2 is not None:
 157         t = t + ' ' + title2
 158     plt.title(t)
 159     plt.xlabel('Time (hours)')
 160     if 'rain' in dat:
 161         plt.ylabel('FMC (%) / Rain mm/h')
 162     else:
 163         plt.ylabel('Fuel moisture content (%)')
 164     plt.legend()
 165
 166 # Calculate mean squared error
 167 def mse(a, b):
 168     return ((a - b)**2).mean()
 169
 170 # Calculate mean absolute error
 171 def mape(a, b):
 172     return ((a - b).__abs__()).mean()
 173
 174 def mse_data(dat):
 175     h2 = dat['h2']
 176     hours = dat['hours']
 177     m = dat['m']
 178     fm = dat['fm']
 179     print('Training MSE:   ' + str(np.round(mse(m[:h2], fm[:h2]), 4)))
 180     print('Prediction MSE: ' + str(np.round(mse(m[h2:hours], fm[h2:hours]), 4)))
 181
 182 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 183
 184 ## RAWS Data Functions
 185
 186 def format_raws(stn, fixnames = True):
 187     raws_dat = stn['OBSERVATIONS']
 188
 189     # Convert to Numpy arrays, check data type for floats
 190     for key in [*stn['OBSERVATIONS'].keys()]:
 191         if type(stn['OBSERVATIONS'][key][0]) is float:
 192             raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
 193         else:
 194             raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
 195
 196     # Transform Data
 197     raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
 198     if 'precip_accum_set_1' in raws_dat.keys():
 199         raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
 200
 201
 202     # Calculate Equilibrium Temps
 203     raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
 204     raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
 205
 206     # Fix nan values
 207     for key in [*raws_dat.keys()]:
 208         if type(raws_dat[key][0]) is float:
 209             raws_dat[key] = fixnan(raws_dat[key], 2)
 210
 211     # Simplify names
 212     if fixnames:
 213         var_mapping = {
 214             'date_time': 'time', 'precip_accum': 'rain',
 215             'fuel_moisture': 'fm', 'relative_humidity': 'rh',
 216             'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
 217             }
 218         old_keys = [*raws_dat.keys()]
 219         old_keys = [k.replace("_set_1", "") for k in old_keys]
 220         new_keys = []
 221         for key in old_keys:
 222             new_keys.append(var_mapping.get(key, key))
 223         old_keys = [*raws_dat.keys()]
 224         old_keys = [k.replace("_set_1", "") for k in old_keys]
 225         new_keys = []
 226         for key in old_keys:
 227             new_keys.append(var_mapping.get(key, key))
 228         raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
 229         return raws_dat2
 230
 231     else: return raws_dat
 232
 233 def format_precip(precipa):
 234     rain=np.array(precipa, dtype = 'float64')
 235     rain = np.diff(rain) # first difference to convert accumulated to hourly
 236     rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
 237     rain[rain > 1000] = np.NaN # filter out erroneously high
 238     rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
 239     return rain
 240
 241 # fix isolated nans
 242 def fixnan(a,n):
 243     for c in range(n):
 244         for i in np.where(np.isnan(a)):
 245             a[i]=0.5*(a[i-1]+a[i+1])
 246         if not any(np.isnan(a)):
 247             break
 248     return a
 249
 250 def retrieve_raws(mes, stid, raws_vars, time1, time2):
 251     meso_ts = mes.timeseries(time1, time2,
 252                        stid=stid, vars=raws_vars)
 253     station = meso_ts['STATION'][0]
 254
 255     raws_dat = format_raws(station)
 256
 257     return station, raws_dat
 258
 259 def raws_data(start=None, hours=None, h2=None, stid=None,meso_token=None):
 260     # input:
 261     #   start YYYYMMDDhhmm
 262     #   hours legth of the period
 263     #   h2 (optional) length of the training period
 264     #   stid  the station id
 265     time_start=start
 266     time_end = datetime.strptime(start, "%Y%m%d%H%M") + timedelta(hours = hours+1) # end time, plus a buffer to control for time shift
 267     time_end = str(int(time_end.strftime("%Y%m%d%H%M")))
 268     print('data_raws: Time Parameters:')
 269     print('-'*50)
 270     print('Time Start:', datetime.strptime(time_start, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
 271     print('Time End:', datetime.strptime(time_end, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
 272     print('Total Runtime:', hours, 'hours')
 273     print('Training Time:', h2, 'hours')
 274     print('-'*50)
 275     raws_vars='air_temp,relative_humidity,precip_accum,fuel_moisture'
 276     m=Meso(meso_token)
 277     station, raws_dat = retrieve_raws(m, stid, raws_vars, time_start, time_end)
 278     raws_dat['title']='RAWS data station ' + stid
 279     raws_dat.update({'hours':hours,'h2':h2,'station':station})
 280     print('Data Read:')
 281     print('-'*50)
 282     print('Station ID:', station['STID'])
 283     print('Lat / Lon:', station['LATITUDE'],', ',station['LONGITUDE'])
 284     if(station['QC_FLAGGED']): print('WARNING: station flagged for QC')
 285     print('-'*50)
 286     raws_dat.update({'hours':hours,'h2':h2})
 287     return raws_dat
 288
 289 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~