fmda_kf_rnn_orig passes date to fmda_rnn_rain, but different results
[notebooks.git] / fmda / data_funcs.py
blobc81b6179ca19a6feb219ec3465da0fb04d6126c5
1 ## Set of Functions to process and format fuel moisture model inputs
2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 import numpy as np, random
5 from numpy.random import rand
6 from MesoPy import Meso
8 import matplotlib.pyplot as plt
9 from moisture_models import model_decay, model_moisture
10 from datetime import datetime, timedelta
11 import json
13 # Helper Functions
14 verbose = False ## Must be declared in environment
15 def vprint(*args):
16 if verbose:
17 for s in args[:(len(args)-1)]:
18 print(s, end=' ')
19 print(args[-1])
21 def to_json(dic,filename):
22 print('writing ',filename)
23 check_data(dic)
24 new={}
25 type_nparray=type(np.array([]))
26 for i in dic:
27 if type(dic[i]) == type_nparray:
28 new[i]=dic[i].tolist() # because numpy.ndarray is not serializable
29 else:
30 new[i]=dic[i]
31 # print('i',type(new[i]))
32 json.dump(new,open(filename,'w'),indent=4)
34 def from_json(filename):
35 print('reading ',filename)
36 dic=json.load(open(filename,'r'))
37 new={}
38 for i in dic:
39 if type(dic[i]) is list:
40 new[i]=np.array(dic[i]) # because ndarray is not serializable
41 else:
42 new[i]=dic[i]
43 check_data(new)
44 return new
46 # Function to simulate moisture data and equilibrium for model testing
47 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
48 hours = days*24
49 h2 = int(hours/2)
50 hour = np.array(range(hours))
51 day = np.array(range(hours))/24.
53 # artificial equilibrium data
54 E = 100.0*np.power(np.sin(np.pi*day),4) # diurnal curve
55 E = 0.05+0.25*E
56 # FMC free run
57 m_f = np.zeros(hours)
58 m_f[0] = 0.1 # initial FMC
59 process_noise=0.
60 for t in range(hours-1):
61 m_f[t+1] = max(0.,model_decay(m_f[t],E[t]) + random.gauss(0,process_noise) )
62 data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
63 E = E + DeltaE
64 return E,m_f,data,hour,h2,DeltaE
66 # the following input or output dictionary with all model data and variables
68 def check_data_array(dat,h,a,s):
69 if a in dat:
70 ar = dat[a]
71 print("array %s %s length %i min %s max %s %s" % (a,s,len(ar),min(ar),max(ar),type(ar)))
72 if h is not None:
73 if len(ar) < h:
74 print('Warning: len(%a) < %i' % (a,ho))
75 exit(1)
76 else:
77 print(a + ' not present')
79 def check_data_scalar(dat,a):
80 if a in dat:
81 print('%s = %s' % (a,dat[a]),' ',type(dat[a]))
82 else:
83 print(a + ' not present' )
85 def check_data(dat,hours=None):
86 check_data_scalar(dat,'hours')
87 check_data_scalar(dat,'h2')
88 if hours is None:
89 hours = dat['hours']
90 else:
91 print('specified hours=%s for check_data' % hours)
92 check_data_array(dat,hours,'E','drying equilibrium (%)')
93 check_data_array(dat,hours,'Ed','drying equilibrium (%)')
94 check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
95 check_data_array(dat,hours,'Ec','equilibrium equilibrium (%)')
96 check_data_array(dat,hours,'rain','rain intensity (mm/h)')
97 check_data_array(dat,hours,'fm','RAWS fuel moisture data (%)')
98 check_data_array(dat,hours,'m','fuel moisture estimate (%)')
99 check_data_array(dat,hours,'m_rnn','RNN fuel moisture estimate (%)')
100 check_data_array(dat,hours,'m_kf','KF fuel moisture estimate (%)')
103 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,
104 DeltaE=0.0,Emin=5,Emax=30,p_rain=0.01,max_rain=10.0):
105 hours = days*24
106 h2 = int(hours/2)
107 hour = np.array(range(hours))
108 day = np.array(range(hours))/24.
109 # artificial equilibrium data
110 E = np.power(np.sin(np.pi*day),power) # diurnal curve betwen 0 and 1
111 E = Emin+(Emax - Emin)*E
112 E = E + DeltaE
113 Ed=E+0.5
114 Ew=np.maximum(E-0.5,0)
115 rain = np.multiply(rand(hours) < p_rain, rand(hours)*max_rain)
116 # FMC free run
117 m_f = np.zeros(hours)
118 m_f[0] = 0.1 # initial FMC
119 # process_noise=0.
120 for t in range(hours-1):
121 m_f[t+1] = max(0.,model_moisture(m_f[t],Ed[t-1],Ew[t-1],rain[t-1]) + random.gauss(0,process_noise))
122 m_f = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
123 dat = {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hours':hours,'h2':h2,'DeltaE':DeltaE,'rain':rain,'title':'Synthetic data'}
125 return dat
127 def plot_one(hmin,hmax,dat,name,linestyle,c,label,type='plot'):
128 # helper for plot_data
129 if name in dat:
130 h = len(dat[name])
131 if hmin is None:
132 hmin=0
133 if hmax is None:
134 hmax=len(dat[name])
135 hour = np.array(range(hmin,hmax))
136 if type=='plot':
137 plt.plot(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
138 elif type=='scatter':
139 plt.scatter(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
141 def plot_data(dat,title=None,title2=None,hmin=None,hmax=None):
142 if 'hours' in dat:
143 if hmax is None:
144 hmax = dat['hours']
145 else:
146 hmax = max(hmax, dat['hours'])
147 plt.figure(figsize=(16,4))
148 plot_one(hmin,hmax,dat,'E',linestyle='--',c='r',label='equilibrium')
149 plot_one(hmin,hmax,dat,'Ed',linestyle='--',c='r',label='drying equilibrium')
150 plot_one(hmin,hmax,dat,'Ew',linestyle='--',c='b',label='wetting equilibrium')
151 plot_one(hmin,hmax,dat,'m_f',linestyle='-',c='g',label='FMC truth')
152 plot_one(hmin,hmax,dat,'fm',linestyle=':',c='b',label='FMC observation')
153 plot_one(hmin,hmax,dat,'m',linestyle='-',c='k',label='FMC estimate')
154 plot_one(hmin,hmax,dat,'Ec',linestyle='-',c='g',label='equilibrium correction')
155 plot_one(hmin,hmax,dat,'rain',linestyle='-',c='b',label='rain intensity')
156 if title is not None:
157 t = title
158 else:
159 t=dat['title']
160 # print('title',type(t),t)
161 if title2 is not None:
162 t = t + ' ' + title2
163 plt.title(t)
164 plt.xlabel('Time (hours)')
165 if 'rain' in dat:
166 plt.ylabel('FMC (%) / Rain mm/h')
167 else:
168 plt.ylabel('Fuel moisture content (%)')
169 plt.legend()
171 # Calculate mean squared error
172 def mse(a, b):
173 return ((a - b)**2).mean()
175 # Calculate mean absolute error
176 def mape(a, b):
177 return ((a - b).__abs__()).mean()
179 def mse_data(dat):
180 h2 = dat['h2']
181 hours = dat['hours']
182 m = dat['m']
183 fm = dat['fm']
184 print('Training MSE: ' + str(np.round(mse(m[:h2], fm[:h2]), 4)))
185 print('Prediction MSE: ' + str(np.round(mse(m[h2:hours], fm[h2:hours]), 4)))
187 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189 ## RAWS Data Functions
191 def format_raws(stn, fixnames = True):
192 raws_dat = stn['OBSERVATIONS']
194 # Convert to Numpy arrays, check data type for floats
195 for key in [*stn['OBSERVATIONS'].keys()]:
196 if type(stn['OBSERVATIONS'][key][0]) is float:
197 raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
198 else:
199 raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
201 # Transform Data
202 raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
203 if 'precip_accum_set_1' in raws_dat.keys():
204 raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
207 # Calculate Equilibrium Temps
208 raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
209 raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
211 # Fix nan values
212 for key in [*raws_dat.keys()]:
213 if type(raws_dat[key][0]) is float:
214 raws_dat[key] = fixnan(raws_dat[key], 2)
216 # Simplify names
217 if fixnames:
218 var_mapping = {
219 'date_time': 'time', 'precip_accum': 'rain',
220 'fuel_moisture': 'fm', 'relative_humidity': 'rh',
221 'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
223 old_keys = [*raws_dat.keys()]
224 old_keys = [k.replace("_set_1", "") for k in old_keys]
225 new_keys = []
226 for key in old_keys:
227 new_keys.append(var_mapping.get(key, key))
228 old_keys = [*raws_dat.keys()]
229 old_keys = [k.replace("_set_1", "") for k in old_keys]
230 new_keys = []
231 for key in old_keys:
232 new_keys.append(var_mapping.get(key, key))
233 raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
234 return raws_dat2
236 else: return raws_dat
238 def format_precip(precipa):
239 rain=np.array(precipa, dtype = 'float64')
240 rain = np.diff(rain) # first difference to convert accumulated to hourly
241 rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
242 rain[rain > 1000] = np.NaN # filter out erroneously high
243 rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
244 return rain
246 # fix isolated nans
247 def fixnan(a,n):
248 for c in range(n):
249 for i in np.where(np.isnan(a)):
250 a[i]=0.5*(a[i-1]+a[i+1])
251 if not any(np.isnan(a)):
252 break
253 return a
255 def retrieve_raws(mes, stid, raws_vars, time1, time2):
256 meso_ts = mes.timeseries(time1, time2,
257 stid=stid, vars=raws_vars)
258 station = meso_ts['STATION'][0]
260 raws_dat = format_raws(station)
262 return station, raws_dat
264 def raws_data(start=None, hours=None, h2=None, stid=None,meso_token=None):
265 # input:
266 # start YYYYMMDDhhmm
267 # hours legth of the period
268 # h2 (optional) length of the training period
269 # stid the station id
270 time_start=start
271 time_end = datetime.strptime(start, "%Y%m%d%H%M") + timedelta(hours = hours+1) # end time, plus a buffer to control for time shift
272 time_end = str(int(time_end.strftime("%Y%m%d%H%M")))
273 print('data_raws: Time Parameters:')
274 print('-'*50)
275 print('Time Start:', datetime.strptime(time_start, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
276 print('Time End:', datetime.strptime(time_end, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
277 print('Total Runtime:', hours, 'hours')
278 print('Training Time:', h2, 'hours')
279 print('-'*50)
280 raws_vars='air_temp,relative_humidity,precip_accum,fuel_moisture'
281 m=Meso(meso_token)
282 station, raws_dat = retrieve_raws(m, stid, raws_vars, time_start, time_end)
283 raws_dat['title']='RAWS data station ' + stid
284 raws_dat.update({'hours':hours,'h2':h2,'station':station})
285 print('Data Read:')
286 print('-'*50)
287 print('Station ID:', station['STID'])
288 print('Lat / Lon:', station['LATITUDE'],', ',station['LONGITUDE'])
289 if(station['QC_FLAGGED']): print('WARNING: station flagged for QC')
290 print('-'*50)
291 raws_dat.update({'hours':hours,'h2':h2})
292 return raws_dat
294 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~