5 "execution_count": null,
6 "id": "9ddd1d89-abdb-4627-a0ca-23db006b62f4",
12 "import os.path as osp\n",
13 "import subprocess\n",
14 "from urllib.parse import urlparse\n",
15 "import numpy as np\n",
16 "import matplotlib.pyplot as plt\n",
17 "from utils import time_intp, str2time, filter_nan_values, read_pkl, read_yml"
22 "execution_count": null,
23 "id": "3c141ad1-b997-485f-a4a7-d0c1ee79eb09",
29 "cell_type": "markdown",
30 "id": "609ea544-ed92-40a6-892b-1943e9f6f620",
38 "execution_count": null,
39 "id": "8ef14e43-030f-422e-a3a6-e9802a3df3ac",
43 "def retrieve_url(url, dest_path, force_download=False):\n",
44 " if not osp.exists(dest_path) or force_download:\n",
45 " target_extension = osp.splitext(dest_path)[1]\n",
46 " url_extension = osp.splitext(urlparse(url).path)[1]\n",
47 " if target_extension != url_extension:\n",
48 " print(\"Warning: file extension from url does not match destination file extension\")\n",
49 " subprocess.call(f\"wget -O {dest_path} {url}\", shell=True)\n",
50 " assert osp.exists(dest_path)\n",
51 " print(f\"Successfully downloaded {url} to {dest_path}\")\n",
53 " print(f\"Target data already exists at {dest_path}\")"
58 "execution_count": null,
59 "id": "41b0d403-7d6b-44f4-963f-8dc492ae0126",
63 "retrieve_url(\"https://demo.openwfm.org/web/data/fmda/dicts/fmda_nw_202401-05_f05.pkl\", \"data/fmda_nw_202401-05_f05.pkl\")"
68 "execution_count": null,
69 "id": "5a2972e2-e360-43b8-9706-01aee586096c",
76 "execution_count": null,
77 "id": "e69e37b9-73ef-45a1-9738-844f26dc3323",
81 "data_params = read_yml(\"params_data.yaml\")\n",
87 "execution_count": null,
88 "id": "6b5c3c82-84ba-426c-b8d9-f540b5026158",
92 "dat = read_pkl(\"data/test_CA_202401.pkl\")"
97 "execution_count": null,
98 "id": "5b2aeecb-89e6-41d0-af88-59a07b929edc",
102 "dat['NV040_202401']['RAWS']['fm']"
106 "cell_type": "markdown",
107 "id": "dae0e47b-02eb-4759-9b95-3cc1b281d41e",
115 "execution_count": null,
116 "id": "8d6fbd18-adad-4370-add7-164275fc010d",
120 "def filter_fmc(x, data_params=data_params):\n",
121 " # Vector to track which values of input x are set to na\n",
122 " na_vec = np.zeros_like(x)\n",
124 " # Filter extreme lows\n",
125 " condition = x<= data_params['min_fm']\n",
126 " x[condition] = np.nan\n",
127 " na_vec[np.where(condition)] = 1\n",
129 " # Filter extreme highs\n",
130 " condition = x>= data_params['max_fm']\n",
131 " x[condition] = np.nan\n",
132 " na_vec[condition] = 1\n",
139 "execution_count": null,
140 "id": "e49e7951-228c-44ee-ae54-d941e9a350f2",
144 "def filter_rain(r, data_params=data_params):\n",
145 " # Vector to track which values of input x are set to na\n",
146 " na_vec = np.zeros_like(r)\n",
148 " # Filter extreme lows\n",
149 " condition = r< data_params['min_rain']\n",
150 " r[condition] = np.nan\n",
151 " na_vec[np.where(condition)] = 1\n",
153 " # Filter extreme highs\n",
154 " condition = r>= data_params['max_rain']\n",
155 " r[condition] = np.nan\n",
156 " na_vec[condition] = 1 \n",
163 "execution_count": null,
164 "id": "957c0a4f-874c-4cf7-ad73-45053c2fdecc",
168 "def filter_wind(w, data_params=data_params):\n",
169 " # Vector to track which values of input x are set to na\n",
170 " na_vec = np.zeros_like(w)\n",
172 " # Filter extreme lows\n",
173 " condition = w< data_params['min_wind']\n",
174 " w[condition] = np.nan\n",
175 " na_vec[np.where(condition)] = 1\n",
177 " # Filter extreme highs\n",
178 " condition = w>= data_params['max_wind']\n",
179 " w[condition] = np.nan\n",
180 " na_vec[condition] = 1 \n",
187 "execution_count": null,
188 "id": "7b6b4347-6abe-4c21-8318-06a766d67d21",
193 " # NV040_202401: more raws observations than HRRR, interp should shorten\n",
194 " # NV026_202401: raws 10min obs, interp should shorten\n",
195 " # CGVC1_202401: missing only a few observations, interp should lengthen\n",
196 " # YNWC1_202401: only 2 observations, should be filtered entirely"
201 "execution_count": null,
202 "id": "74d3b25a-6803-41c5-855b-92ed980d3fcb",
206 "def time_intp(t1, v1, t2):\n",
207 " # Check if t1 v1 t2 are 1D arrays\n",
208 " if t1.ndim != 1:\n",
209 " logging.error(\"Error: t1 is not a 1D array. Dimension: %s\", t1.ndim)\n",
211 " if v1.ndim != 1:\n",
212 " logging.error(\"Error: v1 is not a 1D array. Dimension %s:\", v1.ndim)\n",
214 " if t2.ndim != 1:\n",
215 " logging.errorr(\"Error: t2 is not a 1D array. Dimension: %s\", t2.ndim)\n",
217 " # Check if t1 and v1 have the same length\n",
218 " if len(t1) != len(v1):\n",
219 " logging.error(\"Error: t1 and v1 have different lengths: %s %s\",len(t1),len(v1))\n",
221 " t1_no_nan, v1_no_nan = filter_nan_values(t1, v1)\n",
222 " # print('t1_no_nan.dtype=',t1_no_nan.dtype)\n",
223 " # Convert datetime objects to timestamps\n",
224 " t1_stamps = np.array([t.timestamp() for t in t1_no_nan])\n",
225 " t2_stamps = np.array([t.timestamp() for t in t2])\n",
231 "execution_count": null,
232 "id": "2e1a9438-3bfa-4c6d-8f4e-2888409ecb75",
241 "execution_count": null,
242 "id": "f7e77394-dea3-4b24-82ed-6ce3c7dab21e",
246 "def filter_nan_cases(d, data_params=data_params):\n",
247 " # Remove cases with too many NAN relative to interp target. Use to avoid over-interpolating\n",
248 " ks = [] # accumulate keys to remove\n",
250 " n_fm = dat[k]['RAWS']['fm'].shape[0]\n",
251 " n_obs = dat[k]['HRRR']['time'].shape[0]\n",
252 " pct_na = (1 - n_fm / n_obs)\n",
253 " if pct_na > data_params['max_pct_na']:\n",
254 " print(f\"Removing key {k} due to extensive missing data. Percent NA relative to HRRR: {np.round(pct_na, 3)}\")\n",
256 " for k in ks: d.pop(k, None)"
261 "execution_count": null,
262 "id": "faffc4ea-1b96-4ab4-af44-e2555eba39e1",
266 "filter_nan_cases(dat)"
271 "execution_count": null,
272 "id": "ac4c695e-bcc6-4e53-b934-f3837dac70ca",
281 "execution_count": null,
282 "id": "c3fb7817-82b4-49ee-8c62-ef7308648c02",
289 "execution_count": null,
290 "id": "a9f0b53b-b291-4592-ab0a-d05a6d2f770c",
297 "execution_count": null,
298 "id": "9b7af652-5836-480f-861c-09bacaf035ea",
302 "d = dat['NV026_202401']"
307 "execution_count": null,
308 "id": "c5d7d2d6-cbae-402c-8585-a48ceba8f431",
312 "d['RAWS']['fm'].shape"
317 "execution_count": null,
318 "id": "ceb44038-b166-4827-a9ab-f62f51971c39",
322 "d['RAWS']['time_raws'].shape"
327 "execution_count": null,
328 "id": "481bb0b6-b5de-4244-89bc-24b6073e5cd4",
332 "d['HRRR']['f01']['Ed'].shape"
337 "execution_count": null,
338 "id": "88d3d4b6-c482-4158-ae13-daf729c2a496",
342 "d['HRRR']['time'].shape"
347 "execution_count": null,
348 "id": "d834581e-eb4a-494f-894a-5c81ff92bf85",
352 "from utils import str2time\n",
353 "d = dat['NV040_202401']\n",
354 "time_raws=str2time(d['RAWS']['time_raws']) \n",
355 "time_hrrr=str2time(d['HRRR']['time'])\n",
357 "t1 = np.array([t.timestamp() for t in time_raws])\n",
358 "t2 = np.array([t.timestamp() for t in time_hrrr])"
363 "execution_count": null,
364 "id": "266ac8f6-119b-4424-b162-9077ab76b8e4",
368 "tnew = np.interp(t2, t1, t1)"
373 "execution_count": null,
374 "id": "1950759a-a332-4054-aee1-b086fbf4c39c",
378 "np.isin(tnew, t1).mean()"
383 "execution_count": null,
384 "id": "8b93c8ae-cfbb-4344-beda-5d3f60ed8ed9",
391 "execution_count": null,
392 "id": "1c9a2263-9ec2-4c7e-9c79-90f8209a4c87",
399 "execution_count": null,
400 "id": "9e2a78e5-330a-49ce-9dc9-babcb580d0d4",
404 "d = dat['LIB03_202401']"
409 "execution_count": null,
410 "id": "08b4c474-4332-4b45-9110-d8a21432cdf9",
414 "d['RAWS']['fm'].shape"
419 "execution_count": null,
420 "id": "1c12c514-37db-4b13-a35d-bd28651c00b4",
424 "d['RAWS']['time_raws'].shape"
429 "execution_count": null,
430 "id": "9846b2ae-38a8-4f0b-96a6-1c21d0ecb616",
434 "d['HRRR']['f01']['Ed'].shape"
439 "execution_count": null,
440 "id": "614fe317-8233-4dbf-97c8-c2d8d8923fea",
444 "d['HRRR']['time'].shape"
449 "execution_count": null,
450 "id": "40be5a62-06ec-4623-9684-bf186c3bcd1d",
457 "execution_count": null,
458 "id": "081e0202-2bc0-4bd8-b4ab-f5eec289b70c",
465 "execution_count": null,
466 "id": "31a1d814-9028-4213-ad9d-fabf1e35b554",
473 "execution_count": null,
474 "id": "42de9659-3725-4f92-b17c-455126e89c47",
481 "execution_count": null,
482 "id": "37cb45e7-b46b-429b-a9be-6c3a35677a28",
489 "execution_count": null,
490 "id": "3f007941-6682-4022-9b78-48c214152635",
497 "execution_count": null,
498 "id": "20687996-8267-4e93-9e24-b9515259e256",
505 "execution_count": null,
506 "id": "55ce3b0e-7135-43ff-86ab-379c50b349db",
510 "time_raws = str2time(dat['NV040_202401']['RAWS']['time_raws'])\n",
511 "time_hrrr = str2time(dat['NV040_202401']['HRRR']['time'])\n",
512 "fm = dat['NV040_202401']['RAWS']['fm']\n",
513 "rain = dat['NV040_202401']['HRRR']['f01']['rain']"
518 "execution_count": null,
519 "id": "cc4fd0b0-286b-4cd9-88c4-59e996348235",
523 "len(time_raws) == len(fm)"
528 "execution_count": null,
529 "id": "1680ed39-a021-4631-a764-40f354cf6a09",
538 "execution_count": null,
539 "id": "8a3c405a-f976-4756-aaec-63998d58203d",
548 "execution_count": null,
549 "id": "5f49d9d4-2edf-434e-8812-c8c6ef3fbb2f",
553 "filter_wind(dat['NV040_202401']['HRRR']['f01']['wind'])"
558 "execution_count": null,
559 "id": "3d6a8fe7-1864-475a-a4f3-0fe04806ad50",
563 "dat['NV040_202401']['RAWS']['wind']"
568 "execution_count": null,
569 "id": "25ad1ce4-2060-4ece-932a-396813e56dc6",
573 "dat['CNFC1_202401']['RAWS'].keys()"
578 "execution_count": null,
579 "id": "a58ca663-074d-4eb4-a644-c9e05a415e65",
583 "dat['CNFC1_202401']['RAWS']['solar']"
588 "execution_count": null,
589 "id": "c2f27bfb-73fc-4bc6-9abc-ac1f5ab46acd",
593 "dat['CNFC1_202401']['HRRR']['f01']['wind']"
598 "execution_count": null,
599 "id": "74b98be4-55b1-4db2-81de-5f650aa00bb2",
603 "dat['CNFC1_202401']['HRRR']['f01']['soilm']"
608 "execution_count": null,
609 "id": "915f9534-ff84-4844-bde4-dce567d222c5",
616 "execution_count": null,
617 "id": "cd2f357d-c813-4aa0-b3f7-cdc72f1f660c",
622 " print(\"~\"*50)\n",
624 " print(f\"HRRR Shape: {dat[k]['HRRR']['f01']['Ew'].shape}\")\n",
625 " print(f\"RAWS Shape: {dat[k]['RAWS']['fm'].shape}\")"
630 "execution_count": null,
631 "id": "1ab4cb8b-1aa8-4541-a5cc-041c493d8894",
635 "dat['LIB03_202401']['RAWS']['soil_moisture']"
640 "execution_count": null,
641 "id": "f62a897e-eeb8-4951-b64f-8b6ee11bc92a",
645 "dat['LIB03_202401']['HRRR']['f01']['soilm']"
650 "execution_count": null,
651 "id": "5037b7f3-a749-4b27-9e4c-f6c8d4fb8b6d",
656 " print(\"~\"*50)\n",
659 " fm, filter_vec = filter_fmc(dat[k][\"RAWS\"]['fm'])\n",
660 " print(f\"Percent FMC Observations Filtered: {np.mean(filter_vec)}\")\n",
661 " if 'rain' in dat[k][\"RAWS\"].keys():\n",
662 " rain, filter_vec = filter_rain(dat[k][\"RAWS\"]['rain'])\n",
663 " print(f\"Percent Rain Observations Filtered: {np.mean(filter_vec)}\")\n",
664 " if 'wind' in dat[k][\"RAWS\"].keys():\n",
665 " wind, filter_vec = filter_rain(dat[k][\"RAWS\"]['wind'])\n",
666 " print(f\"Percent Wind Observations Filtered: {np.mean(filter_vec)}\")\n",
667 " # Filter HRRR unncessesary? "
672 "execution_count": null,
673 "id": "abd5985c-c727-44fb-a1d6-b212dc24d3ac",
680 "execution_count": null,
681 "id": "3e6849f7-e983-446e-9d1b-26a1b4c693fd",
688 "execution_count": null,
689 "id": "3b307ae1-980d-487a-aad4-f2fc0a87cbcf",
696 "execution_count": null,
697 "id": "6d7b7335-589e-49bd-88bd-20b7b2a40611",
704 "execution_count": null,
705 "id": "982de3ce-e883-4eab-88dd-41295da16e7f",
712 "execution_count": null,
713 "id": "940e36a7-1597-4bac-8a82-3ecb602e124c",
720 "execution_count": null,
721 "id": "b34546f9-5c28-4c29-9148-0473fa9e00c1",
728 "execution_count": null,
729 "id": "81a421c8-ed47-40f2-83c2-d258bce764f4",
736 "execution_count": null,
737 "id": "897f84d1-4ac8-478a-82f1-ed59052b0199",
743 " dat = format_fmda_data()\n",
744 " dat = filter_data(dat)\n",
750 "execution_count": null,
751 "id": "ed4112af-2b76-4b57-8d2e-1e5e0cba3b42",
755 "def format_fmda_data(d):\n",
757 " # d: (dict) fmda dictionary, output of process in wrfxpy\n",
764 "execution_count": null,
765 "id": "b0ebb758-d1ec-477f-b6af-207dbe2a01d1",
772 "execution_count": null,
773 "id": "e4d704f9-cb10-4b3a-a65d-cf1a26ddb9d4",
780 "execution_count": null,
781 "id": "c5972b2b-eab2-4705-8401-6aa746203ac1",
788 "execution_count": null,
789 "id": "0818c399-be45-4520-9b9e-d847a73ea1bf",
796 "execution_count": null,
797 "id": "94ad78d5-47a0-4c9e-abe8-9f3cfc67208f",
805 "display_name": "Python 3 (ipykernel)",
806 "language": "python",
814 "file_extension": ".py",
815 "mimetype": "text/x-python",
817 "nbconvert_exporter": "python",
818 "pygments_lexer": "ipython3",