Update data_funcs.py
[notebooks.git] / fmda / read_and_clean_tutorial.ipynb
bloba7fccd65407dee1b2d5fdc88ec6c864a03b54e29
2  "cells": [
3   {
4    "cell_type": "code",
5    "execution_count": null,
6    "id": "9ddd1d89-abdb-4627-a0ca-23db006b62f4",
7    "metadata": {},
8    "outputs": [],
9    "source": [
10     "import yaml\n",
11     "import pickle\n",
12     "import os.path as osp\n",
13     "import subprocess\n",
14     "from urllib.parse import urlparse\n",
15     "import numpy as np\n",
16     "import matplotlib.pyplot as plt\n",
17     "from utils import time_intp, str2time, filter_nan_values"
18    ]
19   },
20   {
21    "cell_type": "code",
22    "execution_count": null,
23    "id": "3c141ad1-b997-485f-a4a7-d0c1ee79eb09",
24    "metadata": {},
25    "outputs": [],
26    "source": []
27   },
28   {
29    "cell_type": "markdown",
30    "id": "609ea544-ed92-40a6-892b-1943e9f6f620",
31    "metadata": {},
32    "source": [
33     "## Setup"
34    ]
35   },
36   {
37    "cell_type": "code",
38    "execution_count": null,
39    "id": "8ef14e43-030f-422e-a3a6-e9802a3df3ac",
40    "metadata": {},
41    "outputs": [],
42    "source": [
43     "def retrieve_url(url, dest_path, force_download=False):\n",
44     "    if not osp.exists(dest_path) or force_download:\n",
45     "        target_extension = osp.splitext(dest_path)[1]\n",
46     "        url_extension = osp.splitext(urlparse(url).path)[1]\n",
47     "        if target_extension != url_extension:\n",
48     "            print(\"Warning: file extension from url does not match destination file extension\")\n",
49     "        subprocess.call(f\"wget -O {dest_path}  {url}\", shell=True)\n",
50     "        assert osp.exists(dest_path)\n",
51     "        print(f\"Successfully downloaded {url} to {dest_path}\")\n",
52     "    else:\n",
53     "        print(f\"Target data already exists at {dest_path}\")"
54    ]
55   },
56   {
57    "cell_type": "code",
58    "execution_count": null,
59    "id": "41b0d403-7d6b-44f4-963f-8dc492ae0126",
60    "metadata": {},
61    "outputs": [],
62    "source": [
63     "retrieve_url(\"https://demo.openwfm.org/web/data/fmda/dicts/fmda_nw_202401-05_f05.pkl\", \"data/fmda_nw_202401-05_f05.pkl\")"
64    ]
65   },
66   {
67    "cell_type": "code",
68    "execution_count": null,
69    "id": "5a2972e2-e360-43b8-9706-01aee586096c",
70    "metadata": {},
71    "outputs": [],
72    "source": [
73     "def read_pkl(file_path):\n",
74     "    with open(file_path, 'rb') as file:\n",
75     "        print(f\"loading file {file_path}\")\n",
76     "        d = pickle.load(file)\n",
77     "    return d"
78    ]
79   },
80   {
81    "cell_type": "code",
82    "execution_count": null,
83    "id": "14f03aa9-a39c-4547-8c8c-52e30acff136",
84    "metadata": {},
85    "outputs": [],
86    "source": [
87     "def read_yml(yaml_path, subkey=None):\n",
88     "    with open(yaml_path, 'r') as file:\n",
89     "        d = yaml.safe_load(file)\n",
90     "        if subkey is not None:\n",
91     "            d = d[subkey]\n",
92     "    return d"
93    ]
94   },
95   {
96    "cell_type": "code",
97    "execution_count": null,
98    "id": "e69e37b9-73ef-45a1-9738-844f26dc3323",
99    "metadata": {},
100    "outputs": [],
101    "source": [
102     "data_params = read_yml(\"params_data.yaml\")\n",
103     "data_params"
104    ]
105   },
106   {
107    "cell_type": "code",
108    "execution_count": null,
109    "id": "6b5c3c82-84ba-426c-b8d9-f540b5026158",
110    "metadata": {},
111    "outputs": [],
112    "source": [
113     "dat = read_pkl(\"data/test_CA_202401.pkl\")"
114    ]
115   },
116   {
117    "cell_type": "code",
118    "execution_count": null,
119    "id": "5b2aeecb-89e6-41d0-af88-59a07b929edc",
120    "metadata": {},
121    "outputs": [],
122    "source": [
123     "dat['NV040_202401']['RAWS']['fm']"
124    ]
125   },
126   {
127    "cell_type": "markdown",
128    "id": "dae0e47b-02eb-4759-9b95-3cc1b281d41e",
129    "metadata": {},
130    "source": [
131     "## Filters"
132    ]
133   },
134   {
135    "cell_type": "code",
136    "execution_count": null,
137    "id": "8d6fbd18-adad-4370-add7-164275fc010d",
138    "metadata": {},
139    "outputs": [],
140    "source": [
141     "def filter_fmc(x, data_params=data_params):\n",
142     "    # Vector to track which values of input x are set to na\n",
143     "    na_vec = np.zeros_like(x)\n",
144     "    \n",
145     "    # Filter extreme lows\n",
146     "    condition = x<= data_params['min_fm']\n",
147     "    x[condition] = np.nan\n",
148     "    na_vec[np.where(condition)] = 1\n",
149     "    \n",
150     "    # Filter extreme highs\n",
151     "    condition = x>= data_params['max_fm']\n",
152     "    x[condition] = np.nan\n",
153     "    na_vec[condition] = 1\n",
154     "    \n",
155     "    return x, na_vec"
156    ]
157   },
158   {
159    "cell_type": "code",
160    "execution_count": null,
161    "id": "e49e7951-228c-44ee-ae54-d941e9a350f2",
162    "metadata": {},
163    "outputs": [],
164    "source": [
165     "def filter_rain(r, data_params=data_params):\n",
166     "    # Vector to track which values of input x are set to na\n",
167     "    na_vec = np.zeros_like(r)\n",
168     "    \n",
169     "    # Filter extreme lows\n",
170     "    condition = r< data_params['min_rain']\n",
171     "    r[condition] = np.nan\n",
172     "    na_vec[np.where(condition)] = 1\n",
173     "    \n",
174     "    # Filter extreme highs\n",
175     "    condition = r>= data_params['max_rain']\n",
176     "    r[condition] = np.nan\n",
177     "    na_vec[condition] = 1    \n",
178     "\n",
179     "    return r, na_vec"
180    ]
181   },
182   {
183    "cell_type": "code",
184    "execution_count": null,
185    "id": "957c0a4f-874c-4cf7-ad73-45053c2fdecc",
186    "metadata": {},
187    "outputs": [],
188    "source": [
189     "def filter_wind(w, data_params=data_params):\n",
190     "    # Vector to track which values of input x are set to na\n",
191     "    na_vec = np.zeros_like(w)\n",
192     "    \n",
193     "    # Filter extreme lows\n",
194     "    condition = w< data_params['min_wind']\n",
195     "    w[condition] = np.nan\n",
196     "    na_vec[np.where(condition)] = 1\n",
197     "    \n",
198     "    # Filter extreme highs\n",
199     "    condition = w>= data_params['max_wind']\n",
200     "    w[condition] = np.nan\n",
201     "    na_vec[condition] = 1    \n",
202     "\n",
203     "    return w, na_vec"
204    ]
205   },
206   {
207    "cell_type": "code",
208    "execution_count": null,
209    "id": "7b6b4347-6abe-4c21-8318-06a766d67d21",
210    "metadata": {},
211    "outputs": [],
212    "source": [
213     "# Useful Cases:\n",
214     "    # NV040_202401: more raws observations than HRRR, interp should shorten\n",
215     "    # NV026_202401: raws 10min obs, interp should shorten\n",
216     "    # CGVC1_202401: missing only a few observations, interp should lengthen\n",
217     "    # YNWC1_202401: only 2 observations, should be filtered entirely"
218    ]
219   },
220   {
221    "cell_type": "code",
222    "execution_count": null,
223    "id": "74d3b25a-6803-41c5-855b-92ed980d3fcb",
224    "metadata": {},
225    "outputs": [],
226    "source": [
227     "def time_intp(t1, v1, t2):\n",
228     "    # Check if t1 v1 t2 are 1D arrays\n",
229     "    if t1.ndim != 1:\n",
230     "        logging.error(\"Error: t1 is not a 1D array. Dimension: %s\", t1.ndim)\n",
231     "        return None\n",
232     "    if v1.ndim != 1:\n",
233     "        logging.error(\"Error: v1 is not a 1D array. Dimension %s:\", v1.ndim)\n",
234     "        return None\n",
235     "    if t2.ndim != 1:\n",
236     "        logging.errorr(\"Error: t2 is not a 1D array. Dimension: %s\", t2.ndim)\n",
237     "        return None\n",
238     "    # Check if t1 and v1 have the same length\n",
239     "    if len(t1) != len(v1):\n",
240     "        logging.error(\"Error: t1 and v1 have different lengths: %s %s\",len(t1),len(v1))\n",
241     "        return None\n",
242     "    t1_no_nan, v1_no_nan = filter_nan_values(t1, v1)\n",
243     "    # print('t1_no_nan.dtype=',t1_no_nan.dtype)\n",
244     "    # Convert datetime objects to timestamps\n",
245     "    t1_stamps = np.array([t.timestamp() for t in t1_no_nan])\n",
246     "    t2_stamps = np.array([t.timestamp() for t in t2])\n",
247     "    "
248    ]
249   },
250   {
251    "cell_type": "code",
252    "execution_count": null,
253    "id": "2e1a9438-3bfa-4c6d-8f4e-2888409ecb75",
254    "metadata": {},
255    "outputs": [],
256    "source": [
257     "len(dat.keys())"
258    ]
259   },
260   {
261    "cell_type": "code",
262    "execution_count": null,
263    "id": "f7e77394-dea3-4b24-82ed-6ce3c7dab21e",
264    "metadata": {},
265    "outputs": [],
266    "source": [
267     "def filter_nan_cases(d, data_params=data_params):\n",
268     "    # Remove cases with too many NAN relative to interp target. Use to avoid over-interpolating\n",
269     "    ks = [] # accumulate keys to remove\n",
270     "    for k in dat:\n",
271     "        n_fm = dat[k]['RAWS']['fm'].shape[0]\n",
272     "        n_obs = dat[k]['HRRR']['time'].shape[0]\n",
273     "        pct_na = (1 - n_fm / n_obs)\n",
274     "        if pct_na > data_params['max_pct_na']:\n",
275     "            print(f\"Removing key {k} due to extensive missing data. Percent NA relative to HRRR: {np.round(pct_na, 3)}\")\n",
276     "            ks.append(k)\n",
277     "    for k in ks: d.pop(k, None)"
278    ]
279   },
280   {
281    "cell_type": "code",
282    "execution_count": null,
283    "id": "faffc4ea-1b96-4ab4-af44-e2555eba39e1",
284    "metadata": {},
285    "outputs": [],
286    "source": [
287     "filter_nan_cases(dat)"
288    ]
289   },
290   {
291    "cell_type": "code",
292    "execution_count": null,
293    "id": "ac4c695e-bcc6-4e53-b934-f3837dac70ca",
294    "metadata": {},
295    "outputs": [],
296    "source": [
297     "len(dat.keys())"
298    ]
299   },
300   {
301    "cell_type": "code",
302    "execution_count": null,
303    "id": "c3fb7817-82b4-49ee-8c62-ef7308648c02",
304    "metadata": {},
305    "outputs": [],
306    "source": []
307   },
308   {
309    "cell_type": "code",
310    "execution_count": null,
311    "id": "a9f0b53b-b291-4592-ab0a-d05a6d2f770c",
312    "metadata": {},
313    "outputs": [],
314    "source": []
315   },
316   {
317    "cell_type": "code",
318    "execution_count": null,
319    "id": "9b7af652-5836-480f-861c-09bacaf035ea",
320    "metadata": {},
321    "outputs": [],
322    "source": [
323     "d = dat['NV026_202401']"
324    ]
325   },
326   {
327    "cell_type": "code",
328    "execution_count": null,
329    "id": "c5d7d2d6-cbae-402c-8585-a48ceba8f431",
330    "metadata": {},
331    "outputs": [],
332    "source": [
333     "d['RAWS']['fm'].shape"
334    ]
335   },
336   {
337    "cell_type": "code",
338    "execution_count": null,
339    "id": "ceb44038-b166-4827-a9ab-f62f51971c39",
340    "metadata": {},
341    "outputs": [],
342    "source": [
343     "d['RAWS']['time_raws'].shape"
344    ]
345   },
346   {
347    "cell_type": "code",
348    "execution_count": null,
349    "id": "481bb0b6-b5de-4244-89bc-24b6073e5cd4",
350    "metadata": {},
351    "outputs": [],
352    "source": [
353     "d['HRRR']['f01']['Ed'].shape"
354    ]
355   },
356   {
357    "cell_type": "code",
358    "execution_count": null,
359    "id": "88d3d4b6-c482-4158-ae13-daf729c2a496",
360    "metadata": {},
361    "outputs": [],
362    "source": [
363     "d['HRRR']['time'].shape"
364    ]
365   },
366   {
367    "cell_type": "code",
368    "execution_count": null,
369    "id": "d834581e-eb4a-494f-894a-5c81ff92bf85",
370    "metadata": {},
371    "outputs": [],
372    "source": [
373     "from utils import str2time\n",
374     "d = dat['NV040_202401']\n",
375     "time_raws=str2time(d['RAWS']['time_raws']) \n",
376     "time_hrrr=str2time(d['HRRR']['time'])\n",
377     "\n",
378     "t1 = np.array([t.timestamp() for t in time_raws])\n",
379     "t2 = np.array([t.timestamp() for t in time_hrrr])"
380    ]
381   },
382   {
383    "cell_type": "code",
384    "execution_count": null,
385    "id": "266ac8f6-119b-4424-b162-9077ab76b8e4",
386    "metadata": {},
387    "outputs": [],
388    "source": [
389     "tnew = np.interp(t2, t1, t1)"
390    ]
391   },
392   {
393    "cell_type": "code",
394    "execution_count": null,
395    "id": "1950759a-a332-4054-aee1-b086fbf4c39c",
396    "metadata": {},
397    "outputs": [],
398    "source": [
399     "np.isin(tnew, t1).mean()"
400    ]
401   },
402   {
403    "cell_type": "code",
404    "execution_count": null,
405    "id": "8b93c8ae-cfbb-4344-beda-5d3f60ed8ed9",
406    "metadata": {},
407    "outputs": [],
408    "source": []
409   },
410   {
411    "cell_type": "code",
412    "execution_count": null,
413    "id": "1c9a2263-9ec2-4c7e-9c79-90f8209a4c87",
414    "metadata": {},
415    "outputs": [],
416    "source": []
417   },
418   {
419    "cell_type": "code",
420    "execution_count": null,
421    "id": "9e2a78e5-330a-49ce-9dc9-babcb580d0d4",
422    "metadata": {},
423    "outputs": [],
424    "source": [
425     "d = dat['LIB03_202401']"
426    ]
427   },
428   {
429    "cell_type": "code",
430    "execution_count": null,
431    "id": "08b4c474-4332-4b45-9110-d8a21432cdf9",
432    "metadata": {},
433    "outputs": [],
434    "source": [
435     "d['RAWS']['fm'].shape"
436    ]
437   },
438   {
439    "cell_type": "code",
440    "execution_count": null,
441    "id": "1c12c514-37db-4b13-a35d-bd28651c00b4",
442    "metadata": {},
443    "outputs": [],
444    "source": [
445     "d['RAWS']['time_raws'].shape"
446    ]
447   },
448   {
449    "cell_type": "code",
450    "execution_count": null,
451    "id": "9846b2ae-38a8-4f0b-96a6-1c21d0ecb616",
452    "metadata": {},
453    "outputs": [],
454    "source": [
455     "d['HRRR']['f01']['Ed'].shape"
456    ]
457   },
458   {
459    "cell_type": "code",
460    "execution_count": null,
461    "id": "614fe317-8233-4dbf-97c8-c2d8d8923fea",
462    "metadata": {},
463    "outputs": [],
464    "source": [
465     "d['HRRR']['time'].shape"
466    ]
467   },
468   {
469    "cell_type": "code",
470    "execution_count": null,
471    "id": "40be5a62-06ec-4623-9684-bf186c3bcd1d",
472    "metadata": {},
473    "outputs": [],
474    "source": []
475   },
476   {
477    "cell_type": "code",
478    "execution_count": null,
479    "id": "081e0202-2bc0-4bd8-b4ab-f5eec289b70c",
480    "metadata": {},
481    "outputs": [],
482    "source": []
483   },
484   {
485    "cell_type": "code",
486    "execution_count": null,
487    "id": "31a1d814-9028-4213-ad9d-fabf1e35b554",
488    "metadata": {},
489    "outputs": [],
490    "source": []
491   },
492   {
493    "cell_type": "code",
494    "execution_count": null,
495    "id": "42de9659-3725-4f92-b17c-455126e89c47",
496    "metadata": {},
497    "outputs": [],
498    "source": []
499   },
500   {
501    "cell_type": "code",
502    "execution_count": null,
503    "id": "37cb45e7-b46b-429b-a9be-6c3a35677a28",
504    "metadata": {},
505    "outputs": [],
506    "source": []
507   },
508   {
509    "cell_type": "code",
510    "execution_count": null,
511    "id": "3f007941-6682-4022-9b78-48c214152635",
512    "metadata": {},
513    "outputs": [],
514    "source": []
515   },
516   {
517    "cell_type": "code",
518    "execution_count": null,
519    "id": "20687996-8267-4e93-9e24-b9515259e256",
520    "metadata": {},
521    "outputs": [],
522    "source": []
523   },
524   {
525    "cell_type": "code",
526    "execution_count": null,
527    "id": "55ce3b0e-7135-43ff-86ab-379c50b349db",
528    "metadata": {},
529    "outputs": [],
530    "source": [
531     "time_raws = str2time(dat['NV040_202401']['RAWS']['time_raws'])\n",
532     "time_hrrr = str2time(dat['NV040_202401']['HRRR']['time'])\n",
533     "fm = dat['NV040_202401']['RAWS']['fm']\n",
534     "rain = dat['NV040_202401']['HRRR']['f01']['rain']"
535    ]
536   },
537   {
538    "cell_type": "code",
539    "execution_count": null,
540    "id": "cc4fd0b0-286b-4cd9-88c4-59e996348235",
541    "metadata": {},
542    "outputs": [],
543    "source": [
544     "len(time_raws) == len(fm)"
545    ]
546   },
547   {
548    "cell_type": "code",
549    "execution_count": null,
550    "id": "1680ed39-a021-4631-a764-40f354cf6a09",
551    "metadata": {},
552    "outputs": [],
553    "source": [
554     "filter_fmc(fm)"
555    ]
556   },
557   {
558    "cell_type": "code",
559    "execution_count": null,
560    "id": "8a3c405a-f976-4756-aaec-63998d58203d",
561    "metadata": {},
562    "outputs": [],
563    "source": [
564     "filter_rain(rain)"
565    ]
566   },
567   {
568    "cell_type": "code",
569    "execution_count": null,
570    "id": "5f49d9d4-2edf-434e-8812-c8c6ef3fbb2f",
571    "metadata": {},
572    "outputs": [],
573    "source": [
574     "filter_wind(dat['NV040_202401']['HRRR']['f01']['wind'])"
575    ]
576   },
577   {
578    "cell_type": "code",
579    "execution_count": null,
580    "id": "3d6a8fe7-1864-475a-a4f3-0fe04806ad50",
581    "metadata": {},
582    "outputs": [],
583    "source": [
584     "dat['NV040_202401']['RAWS']['wind']"
585    ]
586   },
587   {
588    "cell_type": "code",
589    "execution_count": null,
590    "id": "25ad1ce4-2060-4ece-932a-396813e56dc6",
591    "metadata": {},
592    "outputs": [],
593    "source": [
594     "dat['CNFC1_202401']['RAWS'].keys()"
595    ]
596   },
597   {
598    "cell_type": "code",
599    "execution_count": null,
600    "id": "a58ca663-074d-4eb4-a644-c9e05a415e65",
601    "metadata": {},
602    "outputs": [],
603    "source": [
604     "dat['CNFC1_202401']['RAWS']['solar']"
605    ]
606   },
607   {
608    "cell_type": "code",
609    "execution_count": null,
610    "id": "c2f27bfb-73fc-4bc6-9abc-ac1f5ab46acd",
611    "metadata": {},
612    "outputs": [],
613    "source": [
614     "dat['CNFC1_202401']['HRRR']['f01']['wind']"
615    ]
616   },
617   {
618    "cell_type": "code",
619    "execution_count": null,
620    "id": "74b98be4-55b1-4db2-81de-5f650aa00bb2",
621    "metadata": {},
622    "outputs": [],
623    "source": [
624     "dat['CNFC1_202401']['HRRR']['f01']['soilm']"
625    ]
626   },
627   {
628    "cell_type": "code",
629    "execution_count": null,
630    "id": "915f9534-ff84-4844-bde4-dce567d222c5",
631    "metadata": {},
632    "outputs": [],
633    "source": []
634   },
635   {
636    "cell_type": "code",
637    "execution_count": null,
638    "id": "cd2f357d-c813-4aa0-b3f7-cdc72f1f660c",
639    "metadata": {},
640    "outputs": [],
641    "source": [
642     "for k in dat:\n",
643     "    print(\"~\"*50)\n",
644     "    print(k)\n",
645     "    print(f\"HRRR Shape: {dat[k]['HRRR']['f01']['Ew'].shape}\")\n",
646     "    print(f\"RAWS Shape: {dat[k]['RAWS']['fm'].shape}\")"
647    ]
648   },
649   {
650    "cell_type": "code",
651    "execution_count": null,
652    "id": "1ab4cb8b-1aa8-4541-a5cc-041c493d8894",
653    "metadata": {},
654    "outputs": [],
655    "source": [
656     "dat['LIB03_202401']['RAWS']['soil_moisture']"
657    ]
658   },
659   {
660    "cell_type": "code",
661    "execution_count": null,
662    "id": "f62a897e-eeb8-4951-b64f-8b6ee11bc92a",
663    "metadata": {},
664    "outputs": [],
665    "source": [
666     "dat['LIB03_202401']['HRRR']['f01']['soilm']"
667    ]
668   },
669   {
670    "cell_type": "code",
671    "execution_count": null,
672    "id": "5037b7f3-a749-4b27-9e4c-f6c8d4fb8b6d",
673    "metadata": {},
674    "outputs": [],
675    "source": [
676     "for k in dat:\n",
677     "    print(\"~\"*50)\n",
678     "    print(k)\n",
679     "    # Filter RAWS\n",
680     "    fm, filter_vec = filter_fmc(dat[k][\"RAWS\"]['fm'])\n",
681     "    print(f\"Percent FMC Observations Filtered: {np.mean(filter_vec)}\")\n",
682     "    if 'rain' in dat[k][\"RAWS\"].keys():\n",
683     "        rain, filter_vec = filter_rain(dat[k][\"RAWS\"]['rain'])\n",
684     "        print(f\"Percent Rain Observations Filtered: {np.mean(filter_vec)}\")\n",
685     "    if 'wind' in dat[k][\"RAWS\"].keys():\n",
686     "        wind, filter_vec = filter_rain(dat[k][\"RAWS\"]['wind'])\n",
687     "        print(f\"Percent Wind Observations Filtered: {np.mean(filter_vec)}\")\n",
688     "    # Filter HRRR unncessesary? "
689    ]
690   },
691   {
692    "cell_type": "code",
693    "execution_count": null,
694    "id": "abd5985c-c727-44fb-a1d6-b212dc24d3ac",
695    "metadata": {},
696    "outputs": [],
697    "source": []
698   },
699   {
700    "cell_type": "code",
701    "execution_count": null,
702    "id": "3e6849f7-e983-446e-9d1b-26a1b4c693fd",
703    "metadata": {},
704    "outputs": [],
705    "source": []
706   },
707   {
708    "cell_type": "code",
709    "execution_count": null,
710    "id": "3b307ae1-980d-487a-aad4-f2fc0a87cbcf",
711    "metadata": {},
712    "outputs": [],
713    "source": []
714   },
715   {
716    "cell_type": "code",
717    "execution_count": null,
718    "id": "6d7b7335-589e-49bd-88bd-20b7b2a40611",
719    "metadata": {},
720    "outputs": [],
721    "source": []
722   },
723   {
724    "cell_type": "code",
725    "execution_count": null,
726    "id": "982de3ce-e883-4eab-88dd-41295da16e7f",
727    "metadata": {},
728    "outputs": [],
729    "source": []
730   },
731   {
732    "cell_type": "code",
733    "execution_count": null,
734    "id": "940e36a7-1597-4bac-8a82-3ecb602e124c",
735    "metadata": {},
736    "outputs": [],
737    "source": []
738   },
739   {
740    "cell_type": "code",
741    "execution_count": null,
742    "id": "b34546f9-5c28-4c29-9148-0473fa9e00c1",
743    "metadata": {},
744    "outputs": [],
745    "source": []
746   },
747   {
748    "cell_type": "code",
749    "execution_count": null,
750    "id": "81a421c8-ed47-40f2-83c2-d258bce764f4",
751    "metadata": {},
752    "outputs": [],
753    "source": []
754   },
755   {
756    "cell_type": "code",
757    "execution_count": null,
758    "id": "897f84d1-4ac8-478a-82f1-ed59052b0199",
759    "metadata": {},
760    "outputs": [],
761    "source": [
762     "def foo():\n",
763     "    d = read_pkl()\n",
764     "    dat = format_fmda_data()\n",
765     "    dat = filter_data(dat)\n",
766     "    return dat"
767    ]
768   },
769   {
770    "cell_type": "code",
771    "execution_count": null,
772    "id": "ed4112af-2b76-4b57-8d2e-1e5e0cba3b42",
773    "metadata": {},
774    "outputs": [],
775    "source": [
776     "def format_fmda_data(d):\n",
777     "    # Inputs:\n",
778     "    #    d: (dict) fmda dictionary, output of process in wrfxpy\n",
779     "\n",
780     "    return d"
781    ]
782   },
783   {
784    "cell_type": "code",
785    "execution_count": null,
786    "id": "b0ebb758-d1ec-477f-b6af-207dbe2a01d1",
787    "metadata": {},
788    "outputs": [],
789    "source": []
790   },
791   {
792    "cell_type": "code",
793    "execution_count": null,
794    "id": "e4d704f9-cb10-4b3a-a65d-cf1a26ddb9d4",
795    "metadata": {},
796    "outputs": [],
797    "source": []
798   },
799   {
800    "cell_type": "code",
801    "execution_count": null,
802    "id": "c5972b2b-eab2-4705-8401-6aa746203ac1",
803    "metadata": {},
804    "outputs": [],
805    "source": []
806   },
807   {
808    "cell_type": "code",
809    "execution_count": null,
810    "id": "0818c399-be45-4520-9b9e-d847a73ea1bf",
811    "metadata": {},
812    "outputs": [],
813    "source": []
814   },
815   {
816    "cell_type": "code",
817    "execution_count": null,
818    "id": "94ad78d5-47a0-4c9e-abe8-9f3cfc67208f",
819    "metadata": {},
820    "outputs": [],
821    "source": []
822   }
823  ],
824  "metadata": {
825   "kernelspec": {
826    "display_name": "Python 3 (ipykernel)",
827    "language": "python",
828    "name": "python3"
829   },
830   "language_info": {
831    "codemirror_mode": {
832     "name": "ipython",
833     "version": 3
834    },
835    "file_extension": ".py",
836    "mimetype": "text/x-python",
837    "name": "python",
838    "nbconvert_exporter": "python",
839    "pygments_lexer": "ipython3",
840    "version": "3.10.13"
841   }
842  },
843  "nbformat": 4,
844  "nbformat_minor": 5