in model.fit verbose 1 -> 2
[notebooks.git] / fmda / data_funcs.py
blob06817b12a3825077f8cb9f4cf2cfcd79f98a8bf9
1 ## Set of Functions to process and format fuel moisture model inputs
2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 import numpy as np, random
5 from numpy.random import rand
6 from MesoPy import Meso
8 import matplotlib.pyplot as plt
9 from moisture_models import model_decay, model_moisture
10 from datetime import datetime, timedelta
11 import json
12 from utils import hash2
14 def to_json(dic,filename):
15 print('writing ',filename)
16 check_data(dic)
17 new={}
18 for i in dic:
19 if type(dic[i]) is np.ndarray:
20 new[i]=dic[i].tolist() # because numpy.ndarray is not serializable
21 else:
22 new[i]=dic[i]
23 # print('i',type(new[i]))
24 new['filename']=filename
25 print('Hash: ', hash2(new))
26 json.dump(new,open(filename,'w'),indent=4)
28 def from_json(filename):
29 print('reading ',filename)
30 dic=json.load(open(filename,'r'))
31 new={}
32 for i in dic:
33 if type(dic[i]) is list:
34 new[i]=np.array(dic[i]) # because ndarray is not serializable
35 else:
36 new[i]=dic[i]
37 check_data(new)
38 print('Hash: ', hash2(new))
39 return new
41 # Function to simulate moisture data and equilibrium for model testing
42 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
43 hours = days*24
44 h2 = int(hours/2)
45 hour = np.array(range(hours))
46 day = np.array(range(hours))/24.
48 # artificial equilibrium data
49 E = 100.0*np.power(np.sin(np.pi*day),4) # diurnal curve
50 E = 0.05+0.25*E
51 # FMC free run
52 m_f = np.zeros(hours)
53 m_f[0] = 0.1 # initial FMC
54 process_noise=0.
55 for t in range(hours-1):
56 m_f[t+1] = max(0.,model_decay(m_f[t],E[t]) + random.gauss(0,process_noise) )
57 data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
58 E = E + DeltaE
59 return E,m_f,data,hour,h2,DeltaE
61 # the following input or output dictionary with all model data and variables
63 def check_data_array(dat,hours,a,s):
64 if a in dat:
65 ar = dat[a]
66 print("array %s %s length %i min %s max %s %s" % (a,s,len(ar),min(ar),max(ar),type(ar)))
67 if hours is not None:
68 if len(ar) < hours:
69 print('len(%a) = %i does not equal to hours = %i' % (a,len(ar),hours))
70 exit(1)
71 else:
72 print(a + ' not present')
74 def check_data_scalar(dat,a):
75 if a in dat:
76 print('%s = %s' % (a,dat[a]),' ',type(dat[a]))
77 else:
78 print(a + ' not present' )
80 def check_data(dat):
81 check_data_scalar(dat,'filename')
82 check_data_scalar(dat,'title')
83 check_data_scalar(dat,'note')
84 check_data_scalar(dat,'hours')
85 check_data_scalar(dat,'h2')
86 if 'hours' in dat:
87 hours = dat['hours']
88 else:
89 hours = None
90 check_data_array(dat,hours,'E','drying equilibrium (%)')
91 check_data_array(dat,hours,'Ed','drying equilibrium (%)')
92 check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
93 check_data_array(dat,hours,'Ec','equilibrium equilibrium (%)')
94 check_data_array(dat,hours,'rain','rain intensity (mm/h)')
95 check_data_array(dat,hours,'fm','RAWS fuel moisture data (%)')
96 check_data_array(dat,hours,'m','fuel moisture estimate (%)')
98 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,
99 DeltaE=0.0,Emin=5,Emax=30,p_rain=0.01,max_rain=10.0):
100 hours = days*24
101 h2 = int(hours/2)
102 hour = np.array(range(hours))
103 day = np.array(range(hours))/24.
104 # artificial equilibrium data
105 E = np.power(np.sin(np.pi*day),power) # diurnal curve betwen 0 and 1
106 E = Emin+(Emax - Emin)*E
107 E = E + DeltaE
108 Ed=E+0.5
109 Ew=np.maximum(E-0.5,0)
110 rain = np.multiply(rand(hours) < p_rain, rand(hours)*max_rain)
111 # FMC free run
112 m_f = np.zeros(hours)
113 m_f[0] = 0.1 # initial FMC
114 # process_noise=0.
115 for t in range(hours-1):
116 m_f[t+1] = max(0.,model_moisture(m_f[t],Ed[t-1],Ew[t-1],rain[t-1]) + random.gauss(0,process_noise))
117 m_f = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
118 dat = {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hours':hours,'h2':h2,'DeltaE':DeltaE,'rain':rain,'title':'Synthetic data'}
120 return dat
122 def plot_one(hmin,hmax,dat,name,linestyle,c,label,type='plot'):
123 # helper for plot_data
124 if name in dat:
125 h = len(dat[name])
126 if hmin is None:
127 hmin=0
128 if hmax is None:
129 hmax=len(dat[name])
130 hour = np.array(range(hmin,hmax))
131 if type=='plot':
132 plt.plot(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
133 elif type=='scatter':
134 plt.scatter(hour,dat[name][hmin:hmax],linestyle=linestyle,c=c,label=label)
136 def plot_data(dat,title=None,title2=None,hmin=None,hmax=None):
137 if 'hours' in dat:
138 if hmax is None:
139 hmax = dat['hours']
140 else:
141 hmax = min(hmax, dat['hours'])
142 plt.figure(figsize=(16,4))
143 plot_one(hmin,hmax,dat,'E',linestyle='--',c='r',label='equilibrium')
144 plot_one(hmin,hmax,dat,'Ed',linestyle='--',c='r',label='drying equilibrium')
145 plot_one(hmin,hmax,dat,'Ew',linestyle='--',c='b',label='wetting equilibrium')
146 plot_one(hmin,hmax,dat,'m_f',linestyle='-',c='g',label='FMC truth')
147 plot_one(hmin,hmax,dat,'fm',linestyle=':',c='b',label='FMC observation')
148 plot_one(hmin,hmax,dat,'m',linestyle='-',c='k',label='FMC estimate')
149 plot_one(hmin,hmax,dat,'Ec',linestyle='-',c='g',label='equilibrium correction')
150 plot_one(hmin,hmax,dat,'rain',linestyle='-',c='b',label='rain intensity')
151 if title is not None:
152 t = title
153 else:
154 t=dat['title']
155 # print('title',type(t),t)
156 if title2 is not None:
157 t = t + ' ' + title2
158 plt.title(t)
159 plt.xlabel('Time (hours)')
160 if 'rain' in dat:
161 plt.ylabel('FMC (%) / Rain mm/h')
162 else:
163 plt.ylabel('Fuel moisture content (%)')
164 plt.legend()
166 # Calculate mean squared error
167 def mse(a, b):
168 return ((a - b)**2).mean()
170 # Calculate mean absolute error
171 def mape(a, b):
172 return ((a - b).__abs__()).mean()
174 def mse_data(dat):
175 h2 = dat['h2']
176 hours = dat['hours']
177 m = dat['m']
178 fm = dat['fm']
179 print('Training MSE: ' + str(np.round(mse(m[:h2], fm[:h2]), 4)))
180 print('Prediction MSE: ' + str(np.round(mse(m[h2:hours], fm[h2:hours]), 4)))
182 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184 ## RAWS Data Functions
186 def format_raws(stn, fixnames = True):
187 raws_dat = stn['OBSERVATIONS']
189 # Convert to Numpy arrays, check data type for floats
190 for key in [*stn['OBSERVATIONS'].keys()]:
191 if type(stn['OBSERVATIONS'][key][0]) is float:
192 raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
193 else:
194 raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
196 # Transform Data
197 raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
198 if 'precip_accum_set_1' in raws_dat.keys():
199 raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
202 # Calculate Equilibrium Temps
203 raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
204 raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
206 # Fix nan values
207 for key in [*raws_dat.keys()]:
208 if type(raws_dat[key][0]) is float:
209 raws_dat[key] = fixnan(raws_dat[key], 2)
211 # Simplify names
212 if fixnames:
213 var_mapping = {
214 'date_time': 'time', 'precip_accum': 'rain',
215 'fuel_moisture': 'fm', 'relative_humidity': 'rh',
216 'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
218 old_keys = [*raws_dat.keys()]
219 old_keys = [k.replace("_set_1", "") for k in old_keys]
220 new_keys = []
221 for key in old_keys:
222 new_keys.append(var_mapping.get(key, key))
223 old_keys = [*raws_dat.keys()]
224 old_keys = [k.replace("_set_1", "") for k in old_keys]
225 new_keys = []
226 for key in old_keys:
227 new_keys.append(var_mapping.get(key, key))
228 raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
229 return raws_dat2
231 else: return raws_dat
233 def format_precip(precipa):
234 rain=np.array(precipa, dtype = 'float64')
235 rain = np.diff(rain) # first difference to convert accumulated to hourly
236 rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
237 rain[rain > 1000] = np.NaN # filter out erroneously high
238 rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
239 return rain
241 # fix isolated nans
242 def fixnan(a,n):
243 for c in range(n):
244 for i in np.where(np.isnan(a)):
245 a[i]=0.5*(a[i-1]+a[i+1])
246 if not any(np.isnan(a)):
247 break
248 return a
250 def retrieve_raws(mes, stid, raws_vars, time1, time2):
251 meso_ts = mes.timeseries(time1, time2,
252 stid=stid, vars=raws_vars)
253 station = meso_ts['STATION'][0]
255 raws_dat = format_raws(station)
257 return station, raws_dat
259 def raws_data(start=None, hours=None, h2=None, stid=None,meso_token=None):
260 # input:
261 # start YYYYMMDDhhmm
262 # hours legth of the period
263 # h2 (optional) length of the training period
264 # stid the station id
265 time_start=start
266 time_end = datetime.strptime(start, "%Y%m%d%H%M") + timedelta(hours = hours+1) # end time, plus a buffer to control for time shift
267 time_end = str(int(time_end.strftime("%Y%m%d%H%M")))
268 print('data_raws: Time Parameters:')
269 print('-'*50)
270 print('Time Start:', datetime.strptime(time_start, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
271 print('Time End:', datetime.strptime(time_end, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
272 print('Total Runtime:', hours, 'hours')
273 print('Training Time:', h2, 'hours')
274 print('-'*50)
275 raws_vars='air_temp,relative_humidity,precip_accum,fuel_moisture'
276 m=Meso(meso_token)
277 station, raws_dat = retrieve_raws(m, stid, raws_vars, time_start, time_end)
278 raws_dat['title']='RAWS data station ' + stid
279 raws_dat.update({'hours':hours,'h2':h2,'station':station})
280 print('Data Read:')
281 print('-'*50)
282 print('Station ID:', station['STID'])
283 print('Lat / Lon:', station['LATITUDE'],', ',station['LONGITUDE'])
284 if(station['QC_FLAGGED']): print('WARNING: station flagged for QC')
285 print('-'*50)
286 raws_dat.update({'hours':hours,'h2':h2})
287 return raws_dat
289 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~