5 "execution_count": null,
6 "id": "9ddd1d89-abdb-4627-a0ca-23db006b62f4",
12 "import os.path as osp\n",
13 "import subprocess\n",
14 "from urllib.parse import urlparse\n",
15 "import numpy as np\n",
16 "import matplotlib.pyplot as plt\n",
17 "from utils import time_intp, str2time, filter_nan_values"
22 "execution_count": null,
23 "id": "3c141ad1-b997-485f-a4a7-d0c1ee79eb09",
29 "cell_type": "markdown",
30 "id": "609ea544-ed92-40a6-892b-1943e9f6f620",
38 "execution_count": null,
39 "id": "8ef14e43-030f-422e-a3a6-e9802a3df3ac",
43 "def retrieve_url(url, dest_path, force_download=False):\n",
44 " if not osp.exists(dest_path) or force_download:\n",
45 " target_extension = osp.splitext(dest_path)[1]\n",
46 " url_extension = osp.splitext(urlparse(url).path)[1]\n",
47 " if target_extension != url_extension:\n",
48 " print(\"Warning: file extension from url does not match destination file extension\")\n",
49 " subprocess.call(f\"wget -O {dest_path} {url}\", shell=True)\n",
50 " assert osp.exists(dest_path)\n",
51 " print(f\"Successfully downloaded {url} to {dest_path}\")\n",
53 " print(f\"Target data already exists at {dest_path}\")"
58 "execution_count": null,
59 "id": "41b0d403-7d6b-44f4-963f-8dc492ae0126",
63 "retrieve_url(\"https://demo.openwfm.org/web/data/fmda/dicts/fmda_nw_202401-05_f05.pkl\", \"data/fmda_nw_202401-05_f05.pkl\")"
68 "execution_count": null,
69 "id": "5a2972e2-e360-43b8-9706-01aee586096c",
73 "def read_pkl(file_path):\n",
74 " with open(file_path, 'rb') as file:\n",
75 " print(f\"loading file {file_path}\")\n",
76 " d = pickle.load(file)\n",
82 "execution_count": null,
83 "id": "14f03aa9-a39c-4547-8c8c-52e30acff136",
87 "def read_yml(yaml_path, subkey=None):\n",
88 " with open(yaml_path, 'r') as file:\n",
89 " d = yaml.safe_load(file)\n",
90 " if subkey is not None:\n",
97 "execution_count": null,
98 "id": "e69e37b9-73ef-45a1-9738-844f26dc3323",
102 "data_params = read_yml(\"params_data.yaml\")\n",
108 "execution_count": null,
109 "id": "6b5c3c82-84ba-426c-b8d9-f540b5026158",
113 "dat = read_pkl(\"data/test_CA_202401.pkl\")"
118 "execution_count": null,
119 "id": "5b2aeecb-89e6-41d0-af88-59a07b929edc",
123 "dat['NV040_202401']['RAWS']['fm']"
127 "cell_type": "markdown",
128 "id": "dae0e47b-02eb-4759-9b95-3cc1b281d41e",
136 "execution_count": null,
137 "id": "8d6fbd18-adad-4370-add7-164275fc010d",
141 "def filter_fmc(x, data_params=data_params):\n",
142 " # Vector to track which values of input x are set to na\n",
143 " na_vec = np.zeros_like(x)\n",
145 " # Filter extreme lows\n",
146 " condition = x<= data_params['min_fm']\n",
147 " x[condition] = np.nan\n",
148 " na_vec[np.where(condition)] = 1\n",
150 " # Filter extreme highs\n",
151 " condition = x>= data_params['max_fm']\n",
152 " x[condition] = np.nan\n",
153 " na_vec[condition] = 1\n",
160 "execution_count": null,
161 "id": "e49e7951-228c-44ee-ae54-d941e9a350f2",
165 "def filter_rain(r, data_params=data_params):\n",
166 " # Vector to track which values of input x are set to na\n",
167 " na_vec = np.zeros_like(r)\n",
169 " # Filter extreme lows\n",
170 " condition = r< data_params['min_rain']\n",
171 " r[condition] = np.nan\n",
172 " na_vec[np.where(condition)] = 1\n",
174 " # Filter extreme highs\n",
175 " condition = r>= data_params['max_rain']\n",
176 " r[condition] = np.nan\n",
177 " na_vec[condition] = 1 \n",
184 "execution_count": null,
185 "id": "957c0a4f-874c-4cf7-ad73-45053c2fdecc",
189 "def filter_wind(w, data_params=data_params):\n",
190 " # Vector to track which values of input x are set to na\n",
191 " na_vec = np.zeros_like(w)\n",
193 " # Filter extreme lows\n",
194 " condition = w< data_params['min_wind']\n",
195 " w[condition] = np.nan\n",
196 " na_vec[np.where(condition)] = 1\n",
198 " # Filter extreme highs\n",
199 " condition = w>= data_params['max_wind']\n",
200 " w[condition] = np.nan\n",
201 " na_vec[condition] = 1 \n",
208 "execution_count": null,
209 "id": "7b6b4347-6abe-4c21-8318-06a766d67d21",
214 " # NV040_202401: more raws observations than HRRR, interp should shorten\n",
215 " # NV026_202401: raws 10min obs, interp should shorten\n",
216 " # CGVC1_202401: missing only a few observations, interp should lengthen\n",
217 " # YNWC1_202401: only 2 observations, should be filtered entirely"
222 "execution_count": null,
223 "id": "74d3b25a-6803-41c5-855b-92ed980d3fcb",
227 "def time_intp(t1, v1, t2):\n",
228 " # Check if t1 v1 t2 are 1D arrays\n",
229 " if t1.ndim != 1:\n",
230 " logging.error(\"Error: t1 is not a 1D array. Dimension: %s\", t1.ndim)\n",
232 " if v1.ndim != 1:\n",
233 " logging.error(\"Error: v1 is not a 1D array. Dimension %s:\", v1.ndim)\n",
235 " if t2.ndim != 1:\n",
236 " logging.errorr(\"Error: t2 is not a 1D array. Dimension: %s\", t2.ndim)\n",
238 " # Check if t1 and v1 have the same length\n",
239 " if len(t1) != len(v1):\n",
240 " logging.error(\"Error: t1 and v1 have different lengths: %s %s\",len(t1),len(v1))\n",
242 " t1_no_nan, v1_no_nan = filter_nan_values(t1, v1)\n",
243 " # print('t1_no_nan.dtype=',t1_no_nan.dtype)\n",
244 " # Convert datetime objects to timestamps\n",
245 " t1_stamps = np.array([t.timestamp() for t in t1_no_nan])\n",
246 " t2_stamps = np.array([t.timestamp() for t in t2])\n",
252 "execution_count": null,
253 "id": "2e1a9438-3bfa-4c6d-8f4e-2888409ecb75",
262 "execution_count": null,
263 "id": "f7e77394-dea3-4b24-82ed-6ce3c7dab21e",
267 "def filter_nan_cases(d, data_params=data_params):\n",
268 " # Remove cases with too many NAN relative to interp target. Use to avoid over-interpolating\n",
269 " ks = [] # accumulate keys to remove\n",
271 " n_fm = dat[k]['RAWS']['fm'].shape[0]\n",
272 " n_obs = dat[k]['HRRR']['time'].shape[0]\n",
273 " pct_na = (1 - n_fm / n_obs)\n",
274 " if pct_na > data_params['max_pct_na']:\n",
275 " print(f\"Removing key {k} due to extensive missing data. Percent NA relative to HRRR: {np.round(pct_na, 3)}\")\n",
277 " for k in ks: d.pop(k, None)"
282 "execution_count": null,
283 "id": "faffc4ea-1b96-4ab4-af44-e2555eba39e1",
287 "filter_nan_cases(dat)"
292 "execution_count": null,
293 "id": "ac4c695e-bcc6-4e53-b934-f3837dac70ca",
302 "execution_count": null,
303 "id": "c3fb7817-82b4-49ee-8c62-ef7308648c02",
310 "execution_count": null,
311 "id": "a9f0b53b-b291-4592-ab0a-d05a6d2f770c",
318 "execution_count": null,
319 "id": "9b7af652-5836-480f-861c-09bacaf035ea",
323 "d = dat['NV026_202401']"
328 "execution_count": null,
329 "id": "c5d7d2d6-cbae-402c-8585-a48ceba8f431",
333 "d['RAWS']['fm'].shape"
338 "execution_count": null,
339 "id": "ceb44038-b166-4827-a9ab-f62f51971c39",
343 "d['RAWS']['time_raws'].shape"
348 "execution_count": null,
349 "id": "481bb0b6-b5de-4244-89bc-24b6073e5cd4",
353 "d['HRRR']['f01']['Ed'].shape"
358 "execution_count": null,
359 "id": "88d3d4b6-c482-4158-ae13-daf729c2a496",
363 "d['HRRR']['time'].shape"
368 "execution_count": null,
369 "id": "d834581e-eb4a-494f-894a-5c81ff92bf85",
373 "from utils import str2time\n",
374 "d = dat['NV040_202401']\n",
375 "time_raws=str2time(d['RAWS']['time_raws']) \n",
376 "time_hrrr=str2time(d['HRRR']['time'])\n",
378 "t1 = np.array([t.timestamp() for t in time_raws])\n",
379 "t2 = np.array([t.timestamp() for t in time_hrrr])"
384 "execution_count": null,
385 "id": "266ac8f6-119b-4424-b162-9077ab76b8e4",
389 "tnew = np.interp(t2, t1, t1)"
394 "execution_count": null,
395 "id": "1950759a-a332-4054-aee1-b086fbf4c39c",
399 "np.isin(tnew, t1).mean()"
404 "execution_count": null,
405 "id": "8b93c8ae-cfbb-4344-beda-5d3f60ed8ed9",
412 "execution_count": null,
413 "id": "1c9a2263-9ec2-4c7e-9c79-90f8209a4c87",
420 "execution_count": null,
421 "id": "9e2a78e5-330a-49ce-9dc9-babcb580d0d4",
425 "d = dat['LIB03_202401']"
430 "execution_count": null,
431 "id": "08b4c474-4332-4b45-9110-d8a21432cdf9",
435 "d['RAWS']['fm'].shape"
440 "execution_count": null,
441 "id": "1c12c514-37db-4b13-a35d-bd28651c00b4",
445 "d['RAWS']['time_raws'].shape"
450 "execution_count": null,
451 "id": "9846b2ae-38a8-4f0b-96a6-1c21d0ecb616",
455 "d['HRRR']['f01']['Ed'].shape"
460 "execution_count": null,
461 "id": "614fe317-8233-4dbf-97c8-c2d8d8923fea",
465 "d['HRRR']['time'].shape"
470 "execution_count": null,
471 "id": "40be5a62-06ec-4623-9684-bf186c3bcd1d",
478 "execution_count": null,
479 "id": "081e0202-2bc0-4bd8-b4ab-f5eec289b70c",
486 "execution_count": null,
487 "id": "31a1d814-9028-4213-ad9d-fabf1e35b554",
494 "execution_count": null,
495 "id": "42de9659-3725-4f92-b17c-455126e89c47",
502 "execution_count": null,
503 "id": "37cb45e7-b46b-429b-a9be-6c3a35677a28",
510 "execution_count": null,
511 "id": "3f007941-6682-4022-9b78-48c214152635",
518 "execution_count": null,
519 "id": "20687996-8267-4e93-9e24-b9515259e256",
526 "execution_count": null,
527 "id": "55ce3b0e-7135-43ff-86ab-379c50b349db",
531 "time_raws = str2time(dat['NV040_202401']['RAWS']['time_raws'])\n",
532 "time_hrrr = str2time(dat['NV040_202401']['HRRR']['time'])\n",
533 "fm = dat['NV040_202401']['RAWS']['fm']\n",
534 "rain = dat['NV040_202401']['HRRR']['f01']['rain']"
539 "execution_count": null,
540 "id": "cc4fd0b0-286b-4cd9-88c4-59e996348235",
544 "len(time_raws) == len(fm)"
549 "execution_count": null,
550 "id": "1680ed39-a021-4631-a764-40f354cf6a09",
559 "execution_count": null,
560 "id": "8a3c405a-f976-4756-aaec-63998d58203d",
569 "execution_count": null,
570 "id": "5f49d9d4-2edf-434e-8812-c8c6ef3fbb2f",
574 "filter_wind(dat['NV040_202401']['HRRR']['f01']['wind'])"
579 "execution_count": null,
580 "id": "3d6a8fe7-1864-475a-a4f3-0fe04806ad50",
584 "dat['NV040_202401']['RAWS']['wind']"
589 "execution_count": null,
590 "id": "25ad1ce4-2060-4ece-932a-396813e56dc6",
594 "dat['CNFC1_202401']['RAWS'].keys()"
599 "execution_count": null,
600 "id": "a58ca663-074d-4eb4-a644-c9e05a415e65",
604 "dat['CNFC1_202401']['RAWS']['solar']"
609 "execution_count": null,
610 "id": "c2f27bfb-73fc-4bc6-9abc-ac1f5ab46acd",
614 "dat['CNFC1_202401']['HRRR']['f01']['wind']"
619 "execution_count": null,
620 "id": "74b98be4-55b1-4db2-81de-5f650aa00bb2",
624 "dat['CNFC1_202401']['HRRR']['f01']['soilm']"
629 "execution_count": null,
630 "id": "915f9534-ff84-4844-bde4-dce567d222c5",
637 "execution_count": null,
638 "id": "cd2f357d-c813-4aa0-b3f7-cdc72f1f660c",
643 " print(\"~\"*50)\n",
645 " print(f\"HRRR Shape: {dat[k]['HRRR']['f01']['Ew'].shape}\")\n",
646 " print(f\"RAWS Shape: {dat[k]['RAWS']['fm'].shape}\")"
651 "execution_count": null,
652 "id": "1ab4cb8b-1aa8-4541-a5cc-041c493d8894",
656 "dat['LIB03_202401']['RAWS']['soil_moisture']"
661 "execution_count": null,
662 "id": "f62a897e-eeb8-4951-b64f-8b6ee11bc92a",
666 "dat['LIB03_202401']['HRRR']['f01']['soilm']"
671 "execution_count": null,
672 "id": "5037b7f3-a749-4b27-9e4c-f6c8d4fb8b6d",
677 " print(\"~\"*50)\n",
680 " fm, filter_vec = filter_fmc(dat[k][\"RAWS\"]['fm'])\n",
681 " print(f\"Percent FMC Observations Filtered: {np.mean(filter_vec)}\")\n",
682 " if 'rain' in dat[k][\"RAWS\"].keys():\n",
683 " rain, filter_vec = filter_rain(dat[k][\"RAWS\"]['rain'])\n",
684 " print(f\"Percent Rain Observations Filtered: {np.mean(filter_vec)}\")\n",
685 " if 'wind' in dat[k][\"RAWS\"].keys():\n",
686 " wind, filter_vec = filter_rain(dat[k][\"RAWS\"]['wind'])\n",
687 " print(f\"Percent Wind Observations Filtered: {np.mean(filter_vec)}\")\n",
688 " # Filter HRRR unncessesary? "
693 "execution_count": null,
694 "id": "abd5985c-c727-44fb-a1d6-b212dc24d3ac",
701 "execution_count": null,
702 "id": "3e6849f7-e983-446e-9d1b-26a1b4c693fd",
709 "execution_count": null,
710 "id": "3b307ae1-980d-487a-aad4-f2fc0a87cbcf",
717 "execution_count": null,
718 "id": "6d7b7335-589e-49bd-88bd-20b7b2a40611",
725 "execution_count": null,
726 "id": "982de3ce-e883-4eab-88dd-41295da16e7f",
733 "execution_count": null,
734 "id": "940e36a7-1597-4bac-8a82-3ecb602e124c",
741 "execution_count": null,
742 "id": "b34546f9-5c28-4c29-9148-0473fa9e00c1",
749 "execution_count": null,
750 "id": "81a421c8-ed47-40f2-83c2-d258bce764f4",
757 "execution_count": null,
758 "id": "897f84d1-4ac8-478a-82f1-ed59052b0199",
764 " dat = format_fmda_data()\n",
765 " dat = filter_data(dat)\n",
771 "execution_count": null,
772 "id": "ed4112af-2b76-4b57-8d2e-1e5e0cba3b42",
776 "def format_fmda_data(d):\n",
778 " # d: (dict) fmda dictionary, output of process in wrfxpy\n",
785 "execution_count": null,
786 "id": "b0ebb758-d1ec-477f-b6af-207dbe2a01d1",
793 "execution_count": null,
794 "id": "e4d704f9-cb10-4b3a-a65d-cf1a26ddb9d4",
801 "execution_count": null,
802 "id": "c5972b2b-eab2-4705-8401-6aa746203ac1",
809 "execution_count": null,
810 "id": "0818c399-be45-4520-9b9e-d847a73ea1bf",
817 "execution_count": null,
818 "id": "94ad78d5-47a0-4c9e-abe8-9f3cfc67208f",
826 "display_name": "Python 3 (ipykernel)",
827 "language": "python",
835 "file_extension": ".py",
836 "mimetype": "text/x-python",
838 "nbconvert_exporter": "python",
839 "pygments_lexer": "ipython3",