5 "execution_count": null,
6 "id": "9ddd1d89-abdb-4627-a0ca-23db006b62f4",
12 "import os.path as osp\n",
13 "import subprocess\n",
14 "from urllib.parse import urlparse\n",
15 "import numpy as np\n",
16 "import matplotlib.pyplot as plt\n",
17 "from utils import time_intp, str2time, filter_nan_values, read_pkl, read_yml"
22 "execution_count": null,
23 "id": "3c141ad1-b997-485f-a4a7-d0c1ee79eb09",
29 "cell_type": "markdown",
30 "id": "609ea544-ed92-40a6-892b-1943e9f6f620",
38 "execution_count": null,
39 "id": "41b0d403-7d6b-44f4-963f-8dc492ae0126",
43 "retrieve_url(\"https://demo.openwfm.org/web/data/fmda/dicts/fmda_nw_202401-05_f05.pkl\", \"data/fmda_nw_202401-05_f05.pkl\")"
48 "execution_count": null,
49 "id": "5a2972e2-e360-43b8-9706-01aee586096c",
56 "execution_count": null,
57 "id": "e69e37b9-73ef-45a1-9738-844f26dc3323",
61 "data_params = read_yml(\"params_data.yaml\")\n",
67 "execution_count": null,
68 "id": "6b5c3c82-84ba-426c-b8d9-f540b5026158",
72 "dat = read_pkl(\"data/test_CA_202401.pkl\")"
77 "execution_count": null,
78 "id": "5b2aeecb-89e6-41d0-af88-59a07b929edc",
82 "dat['NV040_202401']['RAWS']['fm']"
86 "cell_type": "markdown",
87 "id": "dae0e47b-02eb-4759-9b95-3cc1b281d41e",
95 "execution_count": null,
96 "id": "8d6fbd18-adad-4370-add7-164275fc010d",
100 "def filter_fmc(x, data_params=data_params):\n",
101 " # Vector to track which values of input x are set to na\n",
102 " na_vec = np.zeros_like(x)\n",
104 " # Filter extreme lows\n",
105 " condition = x<= data_params['min_fm']\n",
106 " x[condition] = np.nan\n",
107 " na_vec[np.where(condition)] = 1\n",
109 " # Filter extreme highs\n",
110 " condition = x>= data_params['max_fm']\n",
111 " x[condition] = np.nan\n",
112 " na_vec[condition] = 1\n",
119 "execution_count": null,
120 "id": "e49e7951-228c-44ee-ae54-d941e9a350f2",
124 "def filter_rain(r, data_params=data_params):\n",
125 " # Vector to track which values of input x are set to na\n",
126 " na_vec = np.zeros_like(r)\n",
128 " # Filter extreme lows\n",
129 " condition = r< data_params['min_rain']\n",
130 " r[condition] = np.nan\n",
131 " na_vec[np.where(condition)] = 1\n",
133 " # Filter extreme highs\n",
134 " condition = r>= data_params['max_rain']\n",
135 " r[condition] = np.nan\n",
136 " na_vec[condition] = 1 \n",
143 "execution_count": null,
144 "id": "957c0a4f-874c-4cf7-ad73-45053c2fdecc",
148 "def filter_wind(w, data_params=data_params):\n",
149 " # Vector to track which values of input x are set to na\n",
150 " na_vec = np.zeros_like(w)\n",
152 " # Filter extreme lows\n",
153 " condition = w< data_params['min_wind']\n",
154 " w[condition] = np.nan\n",
155 " na_vec[np.where(condition)] = 1\n",
157 " # Filter extreme highs\n",
158 " condition = w>= data_params['max_wind']\n",
159 " w[condition] = np.nan\n",
160 " na_vec[condition] = 1 \n",
167 "execution_count": null,
168 "id": "7b6b4347-6abe-4c21-8318-06a766d67d21",
173 " # NV040_202401: more raws observations than HRRR, interp should shorten\n",
174 " # NV026_202401: raws 10min obs, interp should shorten\n",
175 " # CGVC1_202401: missing only a few observations, interp should lengthen\n",
176 " # YNWC1_202401: only 2 observations, should be filtered entirely"
181 "execution_count": null,
182 "id": "74d3b25a-6803-41c5-855b-92ed980d3fcb",
186 "def time_intp(t1, v1, t2):\n",
187 " # Check if t1 v1 t2 are 1D arrays\n",
188 " if t1.ndim != 1:\n",
189 " logging.error(\"Error: t1 is not a 1D array. Dimension: %s\", t1.ndim)\n",
191 " if v1.ndim != 1:\n",
192 " logging.error(\"Error: v1 is not a 1D array. Dimension %s:\", v1.ndim)\n",
194 " if t2.ndim != 1:\n",
195 " logging.errorr(\"Error: t2 is not a 1D array. Dimension: %s\", t2.ndim)\n",
197 " # Check if t1 and v1 have the same length\n",
198 " if len(t1) != len(v1):\n",
199 " logging.error(\"Error: t1 and v1 have different lengths: %s %s\",len(t1),len(v1))\n",
201 " t1_no_nan, v1_no_nan = filter_nan_values(t1, v1)\n",
202 " # print('t1_no_nan.dtype=',t1_no_nan.dtype)\n",
203 " # Convert datetime objects to timestamps\n",
204 " t1_stamps = np.array([t.timestamp() for t in t1_no_nan])\n",
205 " t2_stamps = np.array([t.timestamp() for t in t2])\n",
211 "execution_count": null,
212 "id": "2e1a9438-3bfa-4c6d-8f4e-2888409ecb75",
221 "execution_count": null,
222 "id": "f7e77394-dea3-4b24-82ed-6ce3c7dab21e",
226 "def filter_nan_cases(d, data_params=data_params):\n",
227 " # Remove cases with too many NAN relative to interp target. Use to avoid over-interpolating\n",
228 " ks = [] # accumulate keys to remove\n",
230 " n_fm = dat[k]['RAWS']['fm'].shape[0]\n",
231 " n_obs = dat[k]['HRRR']['time'].shape[0]\n",
232 " pct_na = (1 - n_fm / n_obs)\n",
233 " if pct_na > data_params['max_pct_na']:\n",
234 " print(f\"Removing key {k} due to extensive missing data. Percent NA relative to HRRR: {np.round(pct_na, 3)}\")\n",
236 " for k in ks: d.pop(k, None)"
241 "execution_count": null,
242 "id": "faffc4ea-1b96-4ab4-af44-e2555eba39e1",
246 "filter_nan_cases(dat)"
251 "execution_count": null,
252 "id": "ac4c695e-bcc6-4e53-b934-f3837dac70ca",
261 "execution_count": null,
262 "id": "c3fb7817-82b4-49ee-8c62-ef7308648c02",
269 "execution_count": null,
270 "id": "a9f0b53b-b291-4592-ab0a-d05a6d2f770c",
277 "execution_count": null,
278 "id": "9b7af652-5836-480f-861c-09bacaf035ea",
282 "d = dat['NV026_202401']"
287 "execution_count": null,
288 "id": "c5d7d2d6-cbae-402c-8585-a48ceba8f431",
292 "d['RAWS']['fm'].shape"
297 "execution_count": null,
298 "id": "ceb44038-b166-4827-a9ab-f62f51971c39",
302 "d['RAWS']['time_raws'].shape"
307 "execution_count": null,
308 "id": "481bb0b6-b5de-4244-89bc-24b6073e5cd4",
312 "d['HRRR']['f01']['Ed'].shape"
317 "execution_count": null,
318 "id": "88d3d4b6-c482-4158-ae13-daf729c2a496",
322 "d['HRRR']['time'].shape"
327 "execution_count": null,
328 "id": "d834581e-eb4a-494f-894a-5c81ff92bf85",
332 "from utils import str2time\n",
333 "d = dat['NV040_202401']\n",
334 "time_raws=str2time(d['RAWS']['time_raws']) \n",
335 "time_hrrr=str2time(d['HRRR']['time'])\n",
337 "t1 = np.array([t.timestamp() for t in time_raws])\n",
338 "t2 = np.array([t.timestamp() for t in time_hrrr])"
343 "execution_count": null,
344 "id": "266ac8f6-119b-4424-b162-9077ab76b8e4",
348 "tnew = np.interp(t2, t1, t1)"
353 "execution_count": null,
354 "id": "1950759a-a332-4054-aee1-b086fbf4c39c",
358 "np.isin(tnew, t1).mean()"
363 "execution_count": null,
364 "id": "8b93c8ae-cfbb-4344-beda-5d3f60ed8ed9",
371 "execution_count": null,
372 "id": "1c9a2263-9ec2-4c7e-9c79-90f8209a4c87",
379 "execution_count": null,
380 "id": "9e2a78e5-330a-49ce-9dc9-babcb580d0d4",
384 "d = dat['LIB03_202401']"
389 "execution_count": null,
390 "id": "08b4c474-4332-4b45-9110-d8a21432cdf9",
394 "d['RAWS']['fm'].shape"
399 "execution_count": null,
400 "id": "1c12c514-37db-4b13-a35d-bd28651c00b4",
404 "d['RAWS']['time_raws'].shape"
409 "execution_count": null,
410 "id": "9846b2ae-38a8-4f0b-96a6-1c21d0ecb616",
414 "d['HRRR']['f01']['Ed'].shape"
419 "execution_count": null,
420 "id": "614fe317-8233-4dbf-97c8-c2d8d8923fea",
424 "d['HRRR']['time'].shape"
429 "execution_count": null,
430 "id": "40be5a62-06ec-4623-9684-bf186c3bcd1d",
437 "execution_count": null,
438 "id": "081e0202-2bc0-4bd8-b4ab-f5eec289b70c",
445 "execution_count": null,
446 "id": "31a1d814-9028-4213-ad9d-fabf1e35b554",
453 "execution_count": null,
454 "id": "42de9659-3725-4f92-b17c-455126e89c47",
461 "execution_count": null,
462 "id": "37cb45e7-b46b-429b-a9be-6c3a35677a28",
469 "execution_count": null,
470 "id": "3f007941-6682-4022-9b78-48c214152635",
477 "execution_count": null,
478 "id": "20687996-8267-4e93-9e24-b9515259e256",
485 "execution_count": null,
486 "id": "55ce3b0e-7135-43ff-86ab-379c50b349db",
490 "time_raws = str2time(dat['NV040_202401']['RAWS']['time_raws'])\n",
491 "time_hrrr = str2time(dat['NV040_202401']['HRRR']['time'])\n",
492 "fm = dat['NV040_202401']['RAWS']['fm']\n",
493 "rain = dat['NV040_202401']['HRRR']['f01']['rain']"
498 "execution_count": null,
499 "id": "cc4fd0b0-286b-4cd9-88c4-59e996348235",
503 "len(time_raws) == len(fm)"
508 "execution_count": null,
509 "id": "1680ed39-a021-4631-a764-40f354cf6a09",
518 "execution_count": null,
519 "id": "8a3c405a-f976-4756-aaec-63998d58203d",
528 "execution_count": null,
529 "id": "5f49d9d4-2edf-434e-8812-c8c6ef3fbb2f",
533 "filter_wind(dat['NV040_202401']['HRRR']['f01']['wind'])"
538 "execution_count": null,
539 "id": "3d6a8fe7-1864-475a-a4f3-0fe04806ad50",
543 "dat['NV040_202401']['RAWS']['wind']"
548 "execution_count": null,
549 "id": "25ad1ce4-2060-4ece-932a-396813e56dc6",
553 "dat['CNFC1_202401']['RAWS'].keys()"
558 "execution_count": null,
559 "id": "a58ca663-074d-4eb4-a644-c9e05a415e65",
563 "dat['CNFC1_202401']['RAWS']['solar']"
568 "execution_count": null,
569 "id": "c2f27bfb-73fc-4bc6-9abc-ac1f5ab46acd",
573 "dat['CNFC1_202401']['HRRR']['f01']['wind']"
578 "execution_count": null,
579 "id": "74b98be4-55b1-4db2-81de-5f650aa00bb2",
583 "dat['CNFC1_202401']['HRRR']['f01']['soilm']"
588 "execution_count": null,
589 "id": "915f9534-ff84-4844-bde4-dce567d222c5",
596 "execution_count": null,
597 "id": "cd2f357d-c813-4aa0-b3f7-cdc72f1f660c",
602 " print(\"~\"*50)\n",
604 " print(f\"HRRR Shape: {dat[k]['HRRR']['f01']['Ew'].shape}\")\n",
605 " print(f\"RAWS Shape: {dat[k]['RAWS']['fm'].shape}\")"
610 "execution_count": null,
611 "id": "1ab4cb8b-1aa8-4541-a5cc-041c493d8894",
615 "dat['LIB03_202401']['RAWS']['soil_moisture']"
620 "execution_count": null,
621 "id": "f62a897e-eeb8-4951-b64f-8b6ee11bc92a",
625 "dat['LIB03_202401']['HRRR']['f01']['soilm']"
630 "execution_count": null,
631 "id": "5037b7f3-a749-4b27-9e4c-f6c8d4fb8b6d",
636 " print(\"~\"*50)\n",
639 " fm, filter_vec = filter_fmc(dat[k][\"RAWS\"]['fm'])\n",
640 " print(f\"Percent FMC Observations Filtered: {np.mean(filter_vec)}\")\n",
641 " if 'rain' in dat[k][\"RAWS\"].keys():\n",
642 " rain, filter_vec = filter_rain(dat[k][\"RAWS\"]['rain'])\n",
643 " print(f\"Percent Rain Observations Filtered: {np.mean(filter_vec)}\")\n",
644 " if 'wind' in dat[k][\"RAWS\"].keys():\n",
645 " wind, filter_vec = filter_rain(dat[k][\"RAWS\"]['wind'])\n",
646 " print(f\"Percent Wind Observations Filtered: {np.mean(filter_vec)}\")\n",
647 " # Filter HRRR unncessesary? "
652 "execution_count": null,
653 "id": "abd5985c-c727-44fb-a1d6-b212dc24d3ac",
660 "execution_count": null,
661 "id": "3e6849f7-e983-446e-9d1b-26a1b4c693fd",
668 "execution_count": null,
669 "id": "3b307ae1-980d-487a-aad4-f2fc0a87cbcf",
676 "execution_count": null,
677 "id": "6d7b7335-589e-49bd-88bd-20b7b2a40611",
684 "execution_count": null,
685 "id": "982de3ce-e883-4eab-88dd-41295da16e7f",
692 "execution_count": null,
693 "id": "940e36a7-1597-4bac-8a82-3ecb602e124c",
700 "execution_count": null,
701 "id": "b34546f9-5c28-4c29-9148-0473fa9e00c1",
708 "execution_count": null,
709 "id": "81a421c8-ed47-40f2-83c2-d258bce764f4",
716 "execution_count": null,
717 "id": "897f84d1-4ac8-478a-82f1-ed59052b0199",
723 " dat = format_fmda_data()\n",
724 " dat = filter_data(dat)\n",
730 "execution_count": null,
731 "id": "ed4112af-2b76-4b57-8d2e-1e5e0cba3b42",
735 "def format_fmda_data(d):\n",
737 " # d: (dict) fmda dictionary, output of process in wrfxpy\n",
744 "execution_count": null,
745 "id": "b0ebb758-d1ec-477f-b6af-207dbe2a01d1",
752 "execution_count": null,
753 "id": "e4d704f9-cb10-4b3a-a65d-cf1a26ddb9d4",
760 "execution_count": null,
761 "id": "c5972b2b-eab2-4705-8401-6aa746203ac1",
768 "execution_count": null,
769 "id": "0818c399-be45-4520-9b9e-d847a73ea1bf",
776 "execution_count": null,
777 "id": "94ad78d5-47a0-4c9e-abe8-9f3cfc67208f",
785 "display_name": "Python 3 (ipykernel)",
786 "language": "python",
794 "file_extension": ".py",
795 "mimetype": "text/x-python",
797 "nbconvert_exporter": "python",
798 "pygments_lexer": "ipython3",