Edit: synthetic_data function
[notebooks.git] / fmda / data_funcs.py
blob1db849c72ae2817ff83341b395726f3c1602ce4b
1 ## Set of Functions to process and format fuel moisture model inputs
2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 import numpy as np, random
5 from moisture_models import model_decay
7 # Helper Functions
8 verbose = False ## Must be declared in environment
9 def vprint(*args):
10 if verbose:
11 for s in args[:(len(args)-1)]:
12 print(s, end=' ')
13 print(args[-1])
15 # Function to simulate moisture data and equilibrium for model testing
16 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
17 hours = days*24
18 h2 = int(hours/2)
19 hour = np.array(range(hours))
20 day = np.array(range(hours))/24.
22 # artificial equilibrium data
23 E = np.power(np.sin(np.pi*day),4) # diurnal curve
24 E = 0.05+0.25*E
25 # FMC free run
26 m_f = np.zeros(hours)
27 m_f[0] = 0.1 # initial FMC
28 process_noise=0.
29 for t in range(hours-1):
30 m_f[t+1] = max(0.,model_decay(m_f[t],E[t]) + random.gauss(0,process_noise) )
31 data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
32 E = E + DeltaE
33 return E,m_f,data,hour,h2,DeltaE
35 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0,Emin=0.5,Emax=0.3):
36 hours = days*24
37 h2 = int(hours/2)
38 hour = np.array(range(hours))
39 day = np.array(range(hours))/24.
40 # artificial equilibrium data
41 E = np.power(np.sin(np.pi*day),power) # diurnal curve
42 E = Emin+(Emax - Emin)*E
43 # FMC free run
44 m_f = np.zeros(hours)
45 m_f[0] = 0.1 # initial FMC
46 # process_noise=0.
47 for t in range(hours-1):
48 m_f[t+1] = max(0.,model_decay(m_f[t],E[t]) + random.gauss(0,process_noise) )
49 data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
50 E = E + DeltaE
51 Ed=E+1.0
52 Ew=np.maximum(E-1.0,0)
53 return {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hour':hour,'h2':h2,'DeltaE':DeltaE}
57 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 ## RAWS Data Functions
61 def format_raws(stn, fixnames = True):
62 raws_dat = stn['OBSERVATIONS']
64 # Convert to Numpy arrays, check data type for floats
65 for key in [*stn['OBSERVATIONS'].keys()]:
66 if type(stn['OBSERVATIONS'][key][0]) is float:
67 raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
68 else:
69 raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
71 # Transform Data
72 raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
73 if 'precip_accum_set_1' in raws_dat.keys():
74 raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
77 # Calculate Equilibrium Temps
78 raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
79 raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
81 # Fix nan values
82 for key in [*raws_dat.keys()]:
83 if type(raws_dat[key][0]) is float:
84 raws_dat[key] = fixnan(raws_dat[key], 2)
86 # Simplify names
87 if fixnames:
88 var_mapping = {
89 'date_time': 'time', 'precip_accum': 'rain',
90 'fuel_moisture': 'fm', 'relative_humidity': 'rh',
91 'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
93 old_keys = [*raws_dat.keys()]
94 old_keys = [k.replace("_set_1", "") for k in old_keys]
95 new_keys = []
96 for key in old_keys:
97 new_keys.append(var_mapping.get(key, key))
98 old_keys = [*raws_dat.keys()]
99 old_keys = [k.replace("_set_1", "") for k in old_keys]
100 new_keys = []
101 for key in old_keys:
102 new_keys.append(var_mapping.get(key, key))
103 raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
104 return raws_dat2
106 else: return raws_dat
108 def format_precip(precipa):
109 rain=np.array(precipa, dtype = 'float64')
110 rain = np.diff(rain) # first difference to convert accumulated to hourly
111 rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
112 rain[rain > 1000] = np.NaN # filter out erroneously high
113 rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
114 return rain
116 # fix isolated nans
117 def fixnan(a,n):
118 for c in range(n):
119 for i in np.where(np.isnan(a)):
120 a[i]=0.5*(a[i-1]+a[i+1])
121 if not any(np.isnan(a)):
122 break
123 return a
125 def retrieve_raws(mes, stid, raws_vars, time1, time2):
126 meso_ts = mes.timeseries(time1, time2,
127 stid=stid, vars=raws_vars)
128 station = meso_ts['STATION'][0]
130 raws_dat = format_raws(station)
132 return station, raws_dat
134 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 def check_data_array(dat,h,a,s):
138 try:
139 ar = dat[a]
140 except:
141 print('cannot find array ' + a)
142 exit(1)
143 print("array %s %s length %i min %s max %s\n" % (a,s,len(ar),min(ar),max(ar)))
144 if len(ar) < h:
145 print('Error: array length less than %i' % hours)
146 exit(1)
148 def check_data(dat,h2,hours):
149 check_data_array(dat,hours,'Ed','drying equilibrium (%)')
150 check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
151 check_data_array(dat,hours,'rain','rain intensity (mm/h)')
152 check_data_array(dat,hours,'fm','RAWS fuel moisture (%)')