fmda/data_funcs.py

   1 ## Set of Functions to process and format fuel moisture model inputs
   2 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   3
   4 import numpy as np, random
   5 from moisture_models import model_decay
   6
   7 # Helper Functions
   8 verbose = False ## Must be declared in environment
   9 def vprint(*args):
  10     if verbose:
  11         for s in args[:(len(args)-1)]:
  12             print(s, end=' ')
  13         print(args[-1])
  14
  15 def check_data_array(dat,h,a,s):
  16     if a in dat:
  17         ar = dat[a]
  18         print("array %s %s length %i min %s max %s" % (a,s,len(ar),min(ar),max(ar)))
  19         if len(ar) < h:
  20             print('Warning: len(%a) < %i' % (a,ho))
  21             exit(1)
  22     else:
  23         print('no array ' + a)
  24
  25 def check_data(dat,h2=None,hours=None):
  26     if h2 is None:
  27         h2 = dat['h2']
  28     if hours is None:
  29         hours = dat['hours']
  30     check_data_array(dat,hours,'E','drying equilibrium (%)')
  31     check_data_array(dat,hours,'Ed','drying equilibrium (%)')
  32     check_data_array(dat,hours,'Ew','wetting equilibrium (%)')
  33     check_data_array(dat,hours,'rain','rain intensity (mm/h)')
  34     check_data_array(dat,hours,'fm','RAWS fuel moisture (%)')
  35
  36 # Function to simulate moisture data and equilibrium for model testing
  37 def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):
  38     hours = days*24
  39     h2 = int(hours/2)
  40     hour = np.array(range(hours))
  41     day = np.array(range(hours))/24.
  42
  43     # artificial equilibrium data
  44     E = 100.0*np.power(np.sin(np.pi*day),4) # diurnal curve
  45     E = 0.05+0.25*E
  46     # FMC free run
  47     m_f = np.zeros(hours)
  48     m_f[0] = 0.1         # initial FMC
  49     process_noise=0.
  50     for t in range(hours-1):
  51         m_f[t+1] = max(0.,model_decay(m_f[t],E[t])  + random.gauss(0,process_noise) )
  52     data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
  53     E = E + DeltaE
  54     return E,m_f,data,hour,h2,DeltaE
  55
  56 def synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0,Emin=5,Emax=30):
  57     hours = days*24
  58     h2 = int(hours/2)
  59     hour = np.array(range(hours))
  60     day = np.array(range(hours))/24.
  61     # artificial equilibrium data
  62     E = np.power(np.sin(np.pi*day),power) # diurnal curve betwen 0 and 1
  63     E = Emin+(Emax - Emin)*E
  64     # FMC free run
  65     m_f = np.zeros(hours)
  66     m_f[0] = 0.1         # initial FMC
  67     # process_noise=0.
  68     for t in range(hours-1):
  69         m_f[t+1] = max(0.,model_decay(m_f[t],E[t])  + random.gauss(0,process_noise) )
  70     data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)
  71     E = E + DeltaE
  72     Ed=E+1.0
  73     Ew=np.maximum(E-1.0,0)
  74     dat = {'E':E,'Ew':Ew,'Ed':Ed,'m_f':m_f,'hours':hours,'h2':h2,'DeltaE':DeltaE}
  75     check_data(dat)
  76     return dat
  77
  78
  79
  80 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  81
  82 ## RAWS Data Functions
  83
  84 def format_raws(stn, fixnames = True):
  85     raws_dat = stn['OBSERVATIONS']
  86
  87     # Convert to Numpy arrays, check data type for floats
  88     for key in [*stn['OBSERVATIONS'].keys()]:
  89         if type(stn['OBSERVATIONS'][key][0]) is float:
  90             raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
  91         else:
  92             raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
  93
  94     # Transform Data
  95     raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
  96     if 'precip_accum_set_1' in raws_dat.keys():
  97         raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
  98
  99
 100     # Calculate Equilibrium Temps
 101     raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
 102     raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
 103
 104     # Fix nan values
 105     for key in [*raws_dat.keys()]:
 106         if type(raws_dat[key][0]) is float:
 107             raws_dat[key] = fixnan(raws_dat[key], 2)
 108
 109     # Simplify names
 110     if fixnames:
 111         var_mapping = {
 112             'date_time': 'time', 'precip_accum': 'rain',
 113             'fuel_moisture': 'fm', 'relative_humidity': 'rh',
 114             'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
 115             }
 116         old_keys = [*raws_dat.keys()]
 117         old_keys = [k.replace("_set_1", "") for k in old_keys]
 118         new_keys = []
 119         for key in old_keys:
 120             new_keys.append(var_mapping.get(key, key))
 121         old_keys = [*raws_dat.keys()]
 122         old_keys = [k.replace("_set_1", "") for k in old_keys]
 123         new_keys = []
 124         for key in old_keys:
 125             new_keys.append(var_mapping.get(key, key))
 126         raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
 127         return raws_dat2
 128
 129     else: return raws_dat
 130
 131 def format_precip(precipa):
 132     rain=np.array(precipa, dtype = 'float64')
 133     rain = np.diff(rain) # first difference to convert accumulated to hourly
 134     rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
 135     rain[rain > 1000] = np.NaN # filter out erroneously high
 136     rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
 137     return rain
 138
 139 # fix isolated nans
 140 def fixnan(a,n):
 141     for c in range(n):
 142         for i in np.where(np.isnan(a)):
 143             a[i]=0.5*(a[i-1]+a[i+1])
 144         if not any(np.isnan(a)):
 145             break
 146     return a
 147
 148 def retrieve_raws(mes, stid, raws_vars, time1, time2):
 149     meso_ts = mes.timeseries(time1, time2,
 150                        stid=stid, vars=raws_vars)
 151     station = meso_ts['STATION'][0]
 152
 153     raws_dat = format_raws(station)
 154
 155     return station, raws_dat
 156
 157 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~