fixing synthetic_data, mv check_data before
[notebooks.git] / fmda / data_funcs.py
blob203b1bb65352d3f8de2754a5c907da2c7b10f0e0
1 ## Set of Functions to process and format fuel moisture model inputs
2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 import numpy as np, random
5 from moisture_models import model_decay
7 # Helper Functions
8 verbose = False ## Must be declared in environment
9 def vprint(*args):
10 if verbose:
11 for s in args[:(len(args)-1)]:
12 print(s, end=' ')
13 print(args[-1])
15 def check_data_array(dat,h,a,s):
16 if a in dat:
17 ar = dat[a]
18 print("array %s %s length %i min %s max %s" % (a,s,len(ar),min(ar),max(ar)))
19 if len(ar) < h:
20 print('Warning: len(%a) < %i' % (a,ho))
21 exit(1)
22 else:
23 print('no array ' + a)
25 def check_data(dat,h2=None,hours=None):
26 if h2 is None:
27 h2 = dat['h2']
28 if hours is None:
29 hours = dat['hours']
30 check_data_array(dat,hours,'E','drying equilibrium (%)')
31 check_data_array(dat,hours,'Ed','drying equilibrium (%)')
32 check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
33 check_data_array(dat,hours,'rain','rain intensity (mm/h)')
34 check_data_array(dat,hours,'fm','RAWS fuel moisture (%)')
36 # Function to simulate moisture data and equilibrium for model testing
37 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
38 hours = days*24
39 h2 = int(hours/2)
40 hour = np.array(range(hours))
41 day = np.array(range(hours))/24.
43 # artificial equilibrium data
44 E = 100.0*np.power(np.sin(np.pi*day),4) # diurnal curve
45 E = 0.05+0.25*E
46 # FMC free run
47 m_f = np.zeros(hours)
48 m_f[0] = 0.1 # initial FMC
49 process_noise=0.
50 for t in range(hours-1):
51 m_f[t+1] = max(0.,model_decay(m_f[t],E[t]) + random.gauss(0,process_noise) )
52 data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
53 E = E + DeltaE
54 return E,m_f,data,hour,h2,DeltaE
56 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0,Emin=5,Emax=30):
57 hours = days*24
58 h2 = int(hours/2)
59 hour = np.array(range(hours))
60 day = np.array(range(hours))/24.
61 # artificial equilibrium data
62 E = np.power(np.sin(np.pi*day),power) # diurnal curve betwen 0 and 1
63 E = Emin+(Emax - Emin)*E
64 # FMC free run
65 m_f = np.zeros(hours)
66 m_f[0] = 0.1 # initial FMC
67 # process_noise=0.
68 for t in range(hours-1):
69 m_f[t+1] = max(0.,model_decay(m_f[t],E[t]) + random.gauss(0,process_noise) )
70 data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
71 E = E + DeltaE
72 Ed=E+1.0
73 Ew=np.maximum(E-1.0,0)
74 dat = {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hours':hours,'h2':h2,'DeltaE':DeltaE}
75 check_data(dat)
76 return dat
80 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82 ## RAWS Data Functions
84 def format_raws(stn, fixnames = True):
85 raws_dat = stn['OBSERVATIONS']
87 # Convert to Numpy arrays, check data type for floats
88 for key in [*stn['OBSERVATIONS'].keys()]:
89 if type(stn['OBSERVATIONS'][key][0]) is float:
90 raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
91 else:
92 raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
94 # Transform Data
95 raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
96 if 'precip_accum_set_1' in raws_dat.keys():
97 raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
100 # Calculate Equilibrium Temps
101 raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
102 raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
104 # Fix nan values
105 for key in [*raws_dat.keys()]:
106 if type(raws_dat[key][0]) is float:
107 raws_dat[key] = fixnan(raws_dat[key], 2)
109 # Simplify names
110 if fixnames:
111 var_mapping = {
112 'date_time': 'time', 'precip_accum': 'rain',
113 'fuel_moisture': 'fm', 'relative_humidity': 'rh',
114 'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
116 old_keys = [*raws_dat.keys()]
117 old_keys = [k.replace("_set_1", "") for k in old_keys]
118 new_keys = []
119 for key in old_keys:
120 new_keys.append(var_mapping.get(key, key))
121 old_keys = [*raws_dat.keys()]
122 old_keys = [k.replace("_set_1", "") for k in old_keys]
123 new_keys = []
124 for key in old_keys:
125 new_keys.append(var_mapping.get(key, key))
126 raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
127 return raws_dat2
129 else: return raws_dat
131 def format_precip(precipa):
132 rain=np.array(precipa, dtype = 'float64')
133 rain = np.diff(rain) # first difference to convert accumulated to hourly
134 rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
135 rain[rain > 1000] = np.NaN # filter out erroneously high
136 rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
137 return rain
139 # fix isolated nans
140 def fixnan(a,n):
141 for c in range(n):
142 for i in np.where(np.isnan(a)):
143 a[i]=0.5*(a[i-1]+a[i+1])
144 if not any(np.isnan(a)):
145 break
146 return a
148 def retrieve_raws(mes, stid, raws_vars, time1, time2):
149 meso_ts = mes.timeseries(time1, time2,
150 stid=stid, vars=raws_vars)
151 station = meso_ts['STATION'][0]
153 raws_dat = format_raws(station)
155 return station, raws_dat
157 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~