5 "execution_count": null,
6 "id": "9ddd1d89-abdb-4627-a0ca-23db006b62f4",
12 "import os.path as osp\n",
13 "import subprocess\n",
14 "from urllib.parse import urlparse\n",
15 "import numpy as np\n",
16 "import matplotlib.pyplot as plt\n",
17 "from utils import time_intp, str2time, filter_nan_values, read_pkl, read_yml, retrieve_url"
21 "cell_type": "markdown",
22 "id": "609ea544-ed92-40a6-892b-1943e9f6f620",
30 "execution_count": null,
31 "id": "41b0d403-7d6b-44f4-963f-8dc492ae0126",
35 "retrieve_url(\"https://demo.openwfm.org/web/data/fmda/dicts/fmda_nw_202401-05_f05.pkl\", \"data/fmda_nw_202401-05_f05.pkl\")"
40 "execution_count": null,
41 "id": "e69e37b9-73ef-45a1-9738-844f26dc3323",
45 "data_params = read_yml(\"params_data.yaml\")\n",
51 "execution_count": null,
52 "id": "6b5c3c82-84ba-426c-b8d9-f540b5026158",
56 "# dat = read_pkl(\"data/test_CA_202401.pkl\")\n",
57 "dat = read_pkl(\"data/test_NW_202401.pkl\")"
61 "cell_type": "markdown",
62 "id": "dae0e47b-02eb-4759-9b95-3cc1b281d41e",
70 "execution_count": null,
71 "id": "7b6b4347-6abe-4c21-8318-06a766d67d21",
76 " # NV040_202401: more raws observations than HRRR, interp should shorten\n",
77 " # NV026_202401: raws 10min obs, interp should shorten\n",
78 " # CGVC1_202401: missing only a few observations, interp should lengthen\n",
79 " # YNWC1_202401: only 2 observations, should be filtered entirely"
84 "execution_count": null,
85 "id": "fc3fbda3-5e93-4122-9278-4b95ec69d25f",
89 "def flag_lag_stretches(x, lag = 1, threshold = data_params['zero_lag_threshold']):\n",
90 " lags = np.diff(x, n=lag)\n",
91 " zero_lag_indices = np.where(lags == 0)[0]\n",
92 " current_run_length = 1\n",
93 " for i in range(1, len(zero_lag_indices)):\n",
94 " if zero_lag_indices[i] == zero_lag_indices[i-1] + 1:\n",
95 " current_run_length += 1\n",
96 " if current_run_length > threshold:\n",
99 " current_run_length = 1\n",
106 "execution_count": null,
107 "id": "67689bfe-3971-495f-95ef-0d52f3c7c3b5",
111 "cases = list([*dat.keys()])\n",
112 "flags = np.zeros(len(cases))\n",
113 "for i, case in enumerate(cases):\n",
114 " print(\"~\"*50)\n",
115 " print(f\"Case: {case}\")\n",
116 " time_raws=str2time(dat[case]['RAWS']['time_raws'])\n",
117 " time_hrrr=str2time(dat[case][\"HRRR\"]['time'])\n",
118 " fm = dat[case]['RAWS']['fm']\n",
119 " ynew = time_intp(time_raws,fm,time_hrrr)\n",
120 " dat[case]['y'] = ynew\n",
121 " if flag_lag_stretches(ynew):\n",
122 " print(f\"Flagging case {case} for zero lag stretches greater than `zero_lag_threshold` param {data_params['zero_lag_threshold']}\")\n",
124 " if flag_lag_stretches(ynew, lag=2):\n",
125 " print(f\"Flagging case {case} for constant linear stretches greater than `max_intp_time` param {data_params['max_intp_time']}\")\n",
127 " if np.any(ynew>=data_params['max_fm']) or np.any(ynew<=data_params['min_fm']):\n",
128 " print(f\"Flagging case {case} for FMC outside param range {data_params['min_fm'],data_params['max_fm']}. FMC range for {case}: {ynew.min(),ynew.max()}\")\n",
134 "execution_count": null,
135 "id": "246272bf-2f2e-4bab-97e2-b9d7f946618a",
139 "flagged_cases = [element for element, flag in zip(cases, flags) if flag == 1]\n",
140 "print(flagged_cases)"
145 "execution_count": null,
146 "id": "bc28bd0a-1673-4414-bbc6-31baf55618ae",
153 "execution_count": null,
154 "id": "1f97877c-89c9-49c1-a141-dac7ee2ea1a1",
161 "execution_count": null,
162 "id": "04a22d48-2ef1-46b4-ab2b-333b240c799f",
169 "execution_count": null,
170 "id": "16f30816-3f94-4238-a0f2-da69632415ba",
178 "display_name": "Python 3 (ipykernel)",
179 "language": "python",
187 "file_extension": ".py",
188 "mimetype": "text/x-python",
190 "nbconvert_exporter": "python",
191 "pygments_lexer": "ipython3",