fmda/read_and_clean_tutorial.ipynb

   1 {
   2  "cells": [
   3   {
   4    "cell_type": "code",
   5    "execution_count": null,
   6    "id": "9ddd1d89-abdb-4627-a0ca-23db006b62f4",
   7    "metadata": {},
   8    "outputs": [],
   9    "source": [
  10     "import yaml\n",
  11     "import pickle\n",
  12     "import os.path as osp\n",
  13     "import subprocess\n",
  14     "from urllib.parse import urlparse\n",
  15     "import numpy as np\n",
  16     "import matplotlib.pyplot as plt\n",
  17     "from utils import time_intp, str2time, filter_nan_values"
  18    ]
  19   },
  20   {
  21    "cell_type": "code",
  22    "execution_count": null,
  23    "id": "3c141ad1-b997-485f-a4a7-d0c1ee79eb09",
  24    "metadata": {},
  25    "outputs": [],
  26    "source": []
  27   },
  28   {
  29    "cell_type": "markdown",
  30    "id": "609ea544-ed92-40a6-892b-1943e9f6f620",
  31    "metadata": {},
  32    "source": [
  33     "## Setup"
  34    ]
  35   },
  36   {
  37    "cell_type": "code",
  38    "execution_count": null,
  39    "id": "8ef14e43-030f-422e-a3a6-e9802a3df3ac",
  40    "metadata": {},
  41    "outputs": [],
  42    "source": [
  43     "def retrieve_url(url, dest_path, force_download=False):\n",
  44     "    if not osp.exists(dest_path) or force_download:\n",
  45     "        target_extension = osp.splitext(dest_path)[1]\n",
  46     "        url_extension = osp.splitext(urlparse(url).path)[1]\n",
  47     "        if target_extension != url_extension:\n",
  48     "            print(\"Warning: file extension from url does not match destination file extension\")\n",
  49     "        subprocess.call(f\"wget -O {dest_path}  {url}\", shell=True)\n",
  50     "        assert osp.exists(dest_path)\n",
  51     "        print(f\"Successfully downloaded {url} to {dest_path}\")\n",
  52     "    else:\n",
  53     "        print(f\"Target data already exists at {dest_path}\")"
  54    ]
  55   },
  56   {
  57    "cell_type": "code",
  58    "execution_count": null,
  59    "id": "41b0d403-7d6b-44f4-963f-8dc492ae0126",
  60    "metadata": {},
  61    "outputs": [],
  62    "source": [
  63     "retrieve_url(\"https://demo.openwfm.org/web/data/fmda/dicts/fmda_nw_202401-05_f05.pkl\", \"data/fmda_nw_202401-05_f05.pkl\")"
  64    ]
  65   },
  66   {
  67    "cell_type": "code",
  68    "execution_count": null,
  69    "id": "5a2972e2-e360-43b8-9706-01aee586096c",
  70    "metadata": {},
  71    "outputs": [],
  72    "source": [
  73     "def read_pkl(file_path):\n",
  74     "    with open(file_path, 'rb') as file:\n",
  75     "        print(f\"loading file {file_path}\")\n",
  76     "        d = pickle.load(file)\n",
  77     "    return d"
  78    ]
  79   },
  80   {
  81    "cell_type": "code",
  82    "execution_count": null,
  83    "id": "14f03aa9-a39c-4547-8c8c-52e30acff136",
  84    "metadata": {},
  85    "outputs": [],
  86    "source": [
  87     "def read_yml(yaml_path, subkey=None):\n",
  88     "    with open(yaml_path, 'r') as file:\n",
  89     "        d = yaml.safe_load(file)\n",
  90     "        if subkey is not None:\n",
  91     "            d = d[subkey]\n",
  92     "    return d"
  93    ]
  94   },
  95   {
  96    "cell_type": "code",
  97    "execution_count": null,
  98    "id": "e69e37b9-73ef-45a1-9738-844f26dc3323",
  99    "metadata": {},
 100    "outputs": [],
 101    "source": [
 102     "data_params = read_yml(\"params_data.yaml\")\n",
 103     "data_params"
 104    ]
 105   },
 106   {
 107    "cell_type": "code",
 108    "execution_count": null,
 109    "id": "6b5c3c82-84ba-426c-b8d9-f540b5026158",
 110    "metadata": {},
 111    "outputs": [],
 112    "source": [
 113     "dat = read_pkl(\"data/test_CA_202401.pkl\")"
 114    ]
 115   },
 116   {
 117    "cell_type": "code",
 118    "execution_count": null,
 119    "id": "5b2aeecb-89e6-41d0-af88-59a07b929edc",
 120    "metadata": {},
 121    "outputs": [],
 122    "source": [
 123     "dat['NV040_202401']['RAWS']['fm']"
 124    ]
 125   },
 126   {
 127    "cell_type": "markdown",
 128    "id": "dae0e47b-02eb-4759-9b95-3cc1b281d41e",
 129    "metadata": {},
 130    "source": [
 131     "## Filters"
 132    ]
 133   },
 134   {
 135    "cell_type": "code",
 136    "execution_count": null,
 137    "id": "8d6fbd18-adad-4370-add7-164275fc010d",
 138    "metadata": {},
 139    "outputs": [],
 140    "source": [
 141     "def filter_fmc(x, data_params=data_params):\n",
 142     "    # Vector to track which values of input x are set to na\n",
 143     "    na_vec = np.zeros_like(x)\n",
 144     "    \n",
 145     "    # Filter extreme lows\n",
 146     "    condition = x<= data_params['min_fm']\n",
 147     "    x[condition] = np.nan\n",
 148     "    na_vec[np.where(condition)] = 1\n",
 149     "    \n",
 150     "    # Filter extreme highs\n",
 151     "    condition = x>= data_params['max_fm']\n",
 152     "    x[condition] = np.nan\n",
 153     "    na_vec[condition] = 1\n",
 154     "    \n",
 155     "    return x, na_vec"
 156    ]
 157   },
 158   {
 159    "cell_type": "code",
 160    "execution_count": null,
 161    "id": "e49e7951-228c-44ee-ae54-d941e9a350f2",
 162    "metadata": {},
 163    "outputs": [],
 164    "source": [
 165     "def filter_rain(r, data_params=data_params):\n",
 166     "    # Vector to track which values of input x are set to na\n",
 167     "    na_vec = np.zeros_like(r)\n",
 168     "    \n",
 169     "    # Filter extreme lows\n",
 170     "    condition = r< data_params['min_rain']\n",
 171     "    r[condition] = np.nan\n",
 172     "    na_vec[np.where(condition)] = 1\n",
 173     "    \n",
 174     "    # Filter extreme highs\n",
 175     "    condition = r>= data_params['max_rain']\n",
 176     "    r[condition] = np.nan\n",
 177     "    na_vec[condition] = 1    \n",
 178     "\n",
 179     "    return r, na_vec"
 180    ]
 181   },
 182   {
 183    "cell_type": "code",
 184    "execution_count": null,
 185    "id": "957c0a4f-874c-4cf7-ad73-45053c2fdecc",
 186    "metadata": {},
 187    "outputs": [],
 188    "source": [
 189     "def filter_wind(w, data_params=data_params):\n",
 190     "    # Vector to track which values of input x are set to na\n",
 191     "    na_vec = np.zeros_like(w)\n",
 192     "    \n",
 193     "    # Filter extreme lows\n",
 194     "    condition = w< data_params['min_wind']\n",
 195     "    w[condition] = np.nan\n",
 196     "    na_vec[np.where(condition)] = 1\n",
 197     "    \n",
 198     "    # Filter extreme highs\n",
 199     "    condition = w>= data_params['max_wind']\n",
 200     "    w[condition] = np.nan\n",
 201     "    na_vec[condition] = 1    \n",
 202     "\n",
 203     "    return w, na_vec"
 204    ]
 205   },
 206   {
 207    "cell_type": "code",
 208    "execution_count": null,
 209    "id": "7b6b4347-6abe-4c21-8318-06a766d67d21",
 210    "metadata": {},
 211    "outputs": [],
 212    "source": [
 213     "# Useful Cases:\n",
 214     "    # NV040_202401: more raws observations than HRRR, interp should shorten\n",
 215     "    # NV026_202401: raws 10min obs, interp should shorten\n",
 216     "    # CGVC1_202401: missing only a few observations, interp should lengthen\n",
 217     "    # YNWC1_202401: only 2 observations, should be filtered entirely"
 218    ]
 219   },
 220   {
 221    "cell_type": "code",
 222    "execution_count": null,
 223    "id": "74d3b25a-6803-41c5-855b-92ed980d3fcb",
 224    "metadata": {},
 225    "outputs": [],
 226    "source": [
 227     "def time_intp(t1, v1, t2):\n",
 228     "    # Check if t1 v1 t2 are 1D arrays\n",
 229     "    if t1.ndim != 1:\n",
 230     "        logging.error(\"Error: t1 is not a 1D array. Dimension: %s\", t1.ndim)\n",
 231     "        return None\n",
 232     "    if v1.ndim != 1:\n",
 233     "        logging.error(\"Error: v1 is not a 1D array. Dimension %s:\", v1.ndim)\n",
 234     "        return None\n",
 235     "    if t2.ndim != 1:\n",
 236     "        logging.errorr(\"Error: t2 is not a 1D array. Dimension: %s\", t2.ndim)\n",
 237     "        return None\n",
 238     "    # Check if t1 and v1 have the same length\n",
 239     "    if len(t1) != len(v1):\n",
 240     "        logging.error(\"Error: t1 and v1 have different lengths: %s %s\",len(t1),len(v1))\n",
 241     "        return None\n",
 242     "    t1_no_nan, v1_no_nan = filter_nan_values(t1, v1)\n",
 243     "    # print('t1_no_nan.dtype=',t1_no_nan.dtype)\n",
 244     "    # Convert datetime objects to timestamps\n",
 245     "    t1_stamps = np.array([t.timestamp() for t in t1_no_nan])\n",
 246     "    t2_stamps = np.array([t.timestamp() for t in t2])\n",
 247     "    "
 248    ]
 249   },
 250   {
 251    "cell_type": "code",
 252    "execution_count": null,
 253    "id": "2e1a9438-3bfa-4c6d-8f4e-2888409ecb75",
 254    "metadata": {},
 255    "outputs": [],
 256    "source": [
 257     "len(dat.keys())"
 258    ]
 259   },
 260   {
 261    "cell_type": "code",
 262    "execution_count": null,
 263    "id": "f7e77394-dea3-4b24-82ed-6ce3c7dab21e",
 264    "metadata": {},
 265    "outputs": [],
 266    "source": [
 267     "def filter_nan_cases(d, data_params=data_params):\n",
 268     "    # Remove cases with too many NAN relative to interp target. Use to avoid over-interpolating\n",
 269     "    ks = [] # accumulate keys to remove\n",
 270     "    for k in dat:\n",
 271     "        n_fm = dat[k]['RAWS']['fm'].shape[0]\n",
 272     "        n_obs = dat[k]['HRRR']['time'].shape[0]\n",
 273     "        pct_na = (1 - n_fm / n_obs)\n",
 274     "        if pct_na > data_params['max_pct_na']:\n",
 275     "            print(f\"Removing key {k} due to extensive missing data. Percent NA relative to HRRR: {np.round(pct_na, 3)}\")\n",
 276     "            ks.append(k)\n",
 277     "    for k in ks: d.pop(k, None)"
 278    ]
 279   },
 280   {
 281    "cell_type": "code",
 282    "execution_count": null,
 283    "id": "faffc4ea-1b96-4ab4-af44-e2555eba39e1",
 284    "metadata": {},
 285    "outputs": [],
 286    "source": [
 287     "filter_nan_cases(dat)"
 288    ]
 289   },
 290   {
 291    "cell_type": "code",
 292    "execution_count": null,
 293    "id": "ac4c695e-bcc6-4e53-b934-f3837dac70ca",
 294    "metadata": {},
 295    "outputs": [],
 296    "source": [
 297     "len(dat.keys())"
 298    ]
 299   },
 300   {
 301    "cell_type": "code",
 302    "execution_count": null,
 303    "id": "c3fb7817-82b4-49ee-8c62-ef7308648c02",
 304    "metadata": {},
 305    "outputs": [],
 306    "source": []
 307   },
 308   {
 309    "cell_type": "code",
 310    "execution_count": null,
 311    "id": "a9f0b53b-b291-4592-ab0a-d05a6d2f770c",
 312    "metadata": {},
 313    "outputs": [],
 314    "source": []
 315   },
 316   {
 317    "cell_type": "code",
 318    "execution_count": null,
 319    "id": "9b7af652-5836-480f-861c-09bacaf035ea",
 320    "metadata": {},
 321    "outputs": [],
 322    "source": [
 323     "d = dat['NV026_202401']"
 324    ]
 325   },
 326   {
 327    "cell_type": "code",
 328    "execution_count": null,
 329    "id": "c5d7d2d6-cbae-402c-8585-a48ceba8f431",
 330    "metadata": {},
 331    "outputs": [],
 332    "source": [
 333     "d['RAWS']['fm'].shape"
 334    ]
 335   },
 336   {
 337    "cell_type": "code",
 338    "execution_count": null,
 339    "id": "ceb44038-b166-4827-a9ab-f62f51971c39",
 340    "metadata": {},
 341    "outputs": [],
 342    "source": [
 343     "d['RAWS']['time_raws'].shape"
 344    ]
 345   },
 346   {
 347    "cell_type": "code",
 348    "execution_count": null,
 349    "id": "481bb0b6-b5de-4244-89bc-24b6073e5cd4",
 350    "metadata": {},
 351    "outputs": [],
 352    "source": [
 353     "d['HRRR']['f01']['Ed'].shape"
 354    ]
 355   },
 356   {
 357    "cell_type": "code",
 358    "execution_count": null,
 359    "id": "88d3d4b6-c482-4158-ae13-daf729c2a496",
 360    "metadata": {},
 361    "outputs": [],
 362    "source": [
 363     "d['HRRR']['time'].shape"
 364    ]
 365   },
 366   {
 367    "cell_type": "code",
 368    "execution_count": null,
 369    "id": "d834581e-eb4a-494f-894a-5c81ff92bf85",
 370    "metadata": {},
 371    "outputs": [],
 372    "source": [
 373     "from utils import str2time\n",
 374     "d = dat['NV040_202401']\n",
 375     "time_raws=str2time(d['RAWS']['time_raws']) \n",
 376     "time_hrrr=str2time(d['HRRR']['time'])\n",
 377     "\n",
 378     "t1 = np.array([t.timestamp() for t in time_raws])\n",
 379     "t2 = np.array([t.timestamp() for t in time_hrrr])"
 380    ]
 381   },
 382   {
 383    "cell_type": "code",
 384    "execution_count": null,
 385    "id": "266ac8f6-119b-4424-b162-9077ab76b8e4",
 386    "metadata": {},
 387    "outputs": [],
 388    "source": [
 389     "tnew = np.interp(t2, t1, t1)"
 390    ]
 391   },
 392   {
 393    "cell_type": "code",
 394    "execution_count": null,
 395    "id": "1950759a-a332-4054-aee1-b086fbf4c39c",
 396    "metadata": {},
 397    "outputs": [],
 398    "source": [
 399     "np.isin(tnew, t1).mean()"
 400    ]
 401   },
 402   {
 403    "cell_type": "code",
 404    "execution_count": null,
 405    "id": "8b93c8ae-cfbb-4344-beda-5d3f60ed8ed9",
 406    "metadata": {},
 407    "outputs": [],
 408    "source": []
 409   },
 410   {
 411    "cell_type": "code",
 412    "execution_count": null,
 413    "id": "1c9a2263-9ec2-4c7e-9c79-90f8209a4c87",
 414    "metadata": {},
 415    "outputs": [],
 416    "source": []
 417   },
 418   {
 419    "cell_type": "code",
 420    "execution_count": null,
 421    "id": "9e2a78e5-330a-49ce-9dc9-babcb580d0d4",
 422    "metadata": {},
 423    "outputs": [],
 424    "source": [
 425     "d = dat['LIB03_202401']"
 426    ]
 427   },
 428   {
 429    "cell_type": "code",
 430    "execution_count": null,
 431    "id": "08b4c474-4332-4b45-9110-d8a21432cdf9",
 432    "metadata": {},
 433    "outputs": [],
 434    "source": [
 435     "d['RAWS']['fm'].shape"
 436    ]
 437   },
 438   {
 439    "cell_type": "code",
 440    "execution_count": null,
 441    "id": "1c12c514-37db-4b13-a35d-bd28651c00b4",
 442    "metadata": {},
 443    "outputs": [],
 444    "source": [
 445     "d['RAWS']['time_raws'].shape"
 446    ]
 447   },
 448   {
 449    "cell_type": "code",
 450    "execution_count": null,
 451    "id": "9846b2ae-38a8-4f0b-96a6-1c21d0ecb616",
 452    "metadata": {},
 453    "outputs": [],
 454    "source": [
 455     "d['HRRR']['f01']['Ed'].shape"
 456    ]
 457   },
 458   {
 459    "cell_type": "code",
 460    "execution_count": null,
 461    "id": "614fe317-8233-4dbf-97c8-c2d8d8923fea",
 462    "metadata": {},
 463    "outputs": [],
 464    "source": [
 465     "d['HRRR']['time'].shape"
 466    ]
 467   },
 468   {
 469    "cell_type": "code",
 470    "execution_count": null,
 471    "id": "40be5a62-06ec-4623-9684-bf186c3bcd1d",
 472    "metadata": {},
 473    "outputs": [],
 474    "source": []
 475   },
 476   {
 477    "cell_type": "code",
 478    "execution_count": null,
 479    "id": "081e0202-2bc0-4bd8-b4ab-f5eec289b70c",
 480    "metadata": {},
 481    "outputs": [],
 482    "source": []
 483   },
 484   {
 485    "cell_type": "code",
 486    "execution_count": null,
 487    "id": "31a1d814-9028-4213-ad9d-fabf1e35b554",
 488    "metadata": {},
 489    "outputs": [],
 490    "source": []
 491   },
 492   {
 493    "cell_type": "code",
 494    "execution_count": null,
 495    "id": "42de9659-3725-4f92-b17c-455126e89c47",
 496    "metadata": {},
 497    "outputs": [],
 498    "source": []
 499   },
 500   {
 501    "cell_type": "code",
 502    "execution_count": null,
 503    "id": "37cb45e7-b46b-429b-a9be-6c3a35677a28",
 504    "metadata": {},
 505    "outputs": [],
 506    "source": []
 507   },
 508   {
 509    "cell_type": "code",
 510    "execution_count": null,
 511    "id": "3f007941-6682-4022-9b78-48c214152635",
 512    "metadata": {},
 513    "outputs": [],
 514    "source": []
 515   },
 516   {
 517    "cell_type": "code",
 518    "execution_count": null,
 519    "id": "20687996-8267-4e93-9e24-b9515259e256",
 520    "metadata": {},
 521    "outputs": [],
 522    "source": []
 523   },
 524   {
 525    "cell_type": "code",
 526    "execution_count": null,
 527    "id": "55ce3b0e-7135-43ff-86ab-379c50b349db",
 528    "metadata": {},
 529    "outputs": [],
 530    "source": [
 531     "time_raws = str2time(dat['NV040_202401']['RAWS']['time_raws'])\n",
 532     "time_hrrr = str2time(dat['NV040_202401']['HRRR']['time'])\n",
 533     "fm = dat['NV040_202401']['RAWS']['fm']\n",
 534     "rain = dat['NV040_202401']['HRRR']['f01']['rain']"
 535    ]
 536   },
 537   {
 538    "cell_type": "code",
 539    "execution_count": null,
 540    "id": "cc4fd0b0-286b-4cd9-88c4-59e996348235",
 541    "metadata": {},
 542    "outputs": [],
 543    "source": [
 544     "len(time_raws) == len(fm)"
 545    ]
 546   },
 547   {
 548    "cell_type": "code",
 549    "execution_count": null,
 550    "id": "1680ed39-a021-4631-a764-40f354cf6a09",
 551    "metadata": {},
 552    "outputs": [],
 553    "source": [
 554     "filter_fmc(fm)"
 555    ]
 556   },
 557   {
 558    "cell_type": "code",
 559    "execution_count": null,
 560    "id": "8a3c405a-f976-4756-aaec-63998d58203d",
 561    "metadata": {},
 562    "outputs": [],
 563    "source": [
 564     "filter_rain(rain)"
 565    ]
 566   },
 567   {
 568    "cell_type": "code",
 569    "execution_count": null,
 570    "id": "5f49d9d4-2edf-434e-8812-c8c6ef3fbb2f",
 571    "metadata": {},
 572    "outputs": [],
 573    "source": [
 574     "filter_wind(dat['NV040_202401']['HRRR']['f01']['wind'])"
 575    ]
 576   },
 577   {
 578    "cell_type": "code",
 579    "execution_count": null,
 580    "id": "3d6a8fe7-1864-475a-a4f3-0fe04806ad50",
 581    "metadata": {},
 582    "outputs": [],
 583    "source": [
 584     "dat['NV040_202401']['RAWS']['wind']"
 585    ]
 586   },
 587   {
 588    "cell_type": "code",
 589    "execution_count": null,
 590    "id": "25ad1ce4-2060-4ece-932a-396813e56dc6",
 591    "metadata": {},
 592    "outputs": [],
 593    "source": [
 594     "dat['CNFC1_202401']['RAWS'].keys()"
 595    ]
 596   },
 597   {
 598    "cell_type": "code",
 599    "execution_count": null,
 600    "id": "a58ca663-074d-4eb4-a644-c9e05a415e65",
 601    "metadata": {},
 602    "outputs": [],
 603    "source": [
 604     "dat['CNFC1_202401']['RAWS']['solar']"
 605    ]
 606   },
 607   {
 608    "cell_type": "code",
 609    "execution_count": null,
 610    "id": "c2f27bfb-73fc-4bc6-9abc-ac1f5ab46acd",
 611    "metadata": {},
 612    "outputs": [],
 613    "source": [
 614     "dat['CNFC1_202401']['HRRR']['f01']['wind']"
 615    ]
 616   },
 617   {
 618    "cell_type": "code",
 619    "execution_count": null,
 620    "id": "74b98be4-55b1-4db2-81de-5f650aa00bb2",
 621    "metadata": {},
 622    "outputs": [],
 623    "source": [
 624     "dat['CNFC1_202401']['HRRR']['f01']['soilm']"
 625    ]
 626   },
 627   {
 628    "cell_type": "code",
 629    "execution_count": null,
 630    "id": "915f9534-ff84-4844-bde4-dce567d222c5",
 631    "metadata": {},
 632    "outputs": [],
 633    "source": []
 634   },
 635   {
 636    "cell_type": "code",
 637    "execution_count": null,
 638    "id": "cd2f357d-c813-4aa0-b3f7-cdc72f1f660c",
 639    "metadata": {},
 640    "outputs": [],
 641    "source": [
 642     "for k in dat:\n",
 643     "    print(\"~\"*50)\n",
 644     "    print(k)\n",
 645     "    print(f\"HRRR Shape: {dat[k]['HRRR']['f01']['Ew'].shape}\")\n",
 646     "    print(f\"RAWS Shape: {dat[k]['RAWS']['fm'].shape}\")"
 647    ]
 648   },
 649   {
 650    "cell_type": "code",
 651    "execution_count": null,
 652    "id": "1ab4cb8b-1aa8-4541-a5cc-041c493d8894",
 653    "metadata": {},
 654    "outputs": [],
 655    "source": [
 656     "dat['LIB03_202401']['RAWS']['soil_moisture']"
 657    ]
 658   },
 659   {
 660    "cell_type": "code",
 661    "execution_count": null,
 662    "id": "f62a897e-eeb8-4951-b64f-8b6ee11bc92a",
 663    "metadata": {},
 664    "outputs": [],
 665    "source": [
 666     "dat['LIB03_202401']['HRRR']['f01']['soilm']"
 667    ]
 668   },
 669   {
 670    "cell_type": "code",
 671    "execution_count": null,
 672    "id": "5037b7f3-a749-4b27-9e4c-f6c8d4fb8b6d",
 673    "metadata": {},
 674    "outputs": [],
 675    "source": [
 676     "for k in dat:\n",
 677     "    print(\"~\"*50)\n",
 678     "    print(k)\n",
 679     "    # Filter RAWS\n",
 680     "    fm, filter_vec = filter_fmc(dat[k][\"RAWS\"]['fm'])\n",
 681     "    print(f\"Percent FMC Observations Filtered: {np.mean(filter_vec)}\")\n",
 682     "    if 'rain' in dat[k][\"RAWS\"].keys():\n",
 683     "        rain, filter_vec = filter_rain(dat[k][\"RAWS\"]['rain'])\n",
 684     "        print(f\"Percent Rain Observations Filtered: {np.mean(filter_vec)}\")\n",
 685     "    if 'wind' in dat[k][\"RAWS\"].keys():\n",
 686     "        wind, filter_vec = filter_rain(dat[k][\"RAWS\"]['wind'])\n",
 687     "        print(f\"Percent Wind Observations Filtered: {np.mean(filter_vec)}\")\n",
 688     "    # Filter HRRR unncessesary? "
 689    ]
 690   },
 691   {
 692    "cell_type": "code",
 693    "execution_count": null,
 694    "id": "abd5985c-c727-44fb-a1d6-b212dc24d3ac",
 695    "metadata": {},
 696    "outputs": [],
 697    "source": []
 698   },
 699   {
 700    "cell_type": "code",
 701    "execution_count": null,
 702    "id": "3e6849f7-e983-446e-9d1b-26a1b4c693fd",
 703    "metadata": {},
 704    "outputs": [],
 705    "source": []
 706   },
 707   {
 708    "cell_type": "code",
 709    "execution_count": null,
 710    "id": "3b307ae1-980d-487a-aad4-f2fc0a87cbcf",
 711    "metadata": {},
 712    "outputs": [],
 713    "source": []
 714   },
 715   {
 716    "cell_type": "code",
 717    "execution_count": null,
 718    "id": "6d7b7335-589e-49bd-88bd-20b7b2a40611",
 719    "metadata": {},
 720    "outputs": [],
 721    "source": []
 722   },
 723   {
 724    "cell_type": "code",
 725    "execution_count": null,
 726    "id": "982de3ce-e883-4eab-88dd-41295da16e7f",
 727    "metadata": {},
 728    "outputs": [],
 729    "source": []
 730   },
 731   {
 732    "cell_type": "code",
 733    "execution_count": null,
 734    "id": "940e36a7-1597-4bac-8a82-3ecb602e124c",
 735    "metadata": {},
 736    "outputs": [],
 737    "source": []
 738   },
 739   {
 740    "cell_type": "code",
 741    "execution_count": null,
 742    "id": "b34546f9-5c28-4c29-9148-0473fa9e00c1",
 743    "metadata": {},
 744    "outputs": [],
 745    "source": []
 746   },
 747   {
 748    "cell_type": "code",
 749    "execution_count": null,
 750    "id": "81a421c8-ed47-40f2-83c2-d258bce764f4",
 751    "metadata": {},
 752    "outputs": [],
 753    "source": []
 754   },
 755   {
 756    "cell_type": "code",
 757    "execution_count": null,
 758    "id": "897f84d1-4ac8-478a-82f1-ed59052b0199",
 759    "metadata": {},
 760    "outputs": [],
 761    "source": [
 762     "def foo():\n",
 763     "    d = read_pkl()\n",
 764     "    dat = format_fmda_data()\n",
 765     "    dat = filter_data(dat)\n",
 766     "    return dat"
 767    ]
 768   },
 769   {
 770    "cell_type": "code",
 771    "execution_count": null,
 772    "id": "ed4112af-2b76-4b57-8d2e-1e5e0cba3b42",
 773    "metadata": {},
 774    "outputs": [],
 775    "source": [
 776     "def format_fmda_data(d):\n",
 777     "    # Inputs:\n",
 778     "    #    d: (dict) fmda dictionary, output of process in wrfxpy\n",
 779     "\n",
 780     "    return d"
 781    ]
 782   },
 783   {
 784    "cell_type": "code",
 785    "execution_count": null,
 786    "id": "b0ebb758-d1ec-477f-b6af-207dbe2a01d1",
 787    "metadata": {},
 788    "outputs": [],
 789    "source": []
 790   },
 791   {
 792    "cell_type": "code",
 793    "execution_count": null,
 794    "id": "e4d704f9-cb10-4b3a-a65d-cf1a26ddb9d4",
 795    "metadata": {},
 796    "outputs": [],
 797    "source": []
 798   },
 799   {
 800    "cell_type": "code",
 801    "execution_count": null,
 802    "id": "c5972b2b-eab2-4705-8401-6aa746203ac1",
 803    "metadata": {},
 804    "outputs": [],
 805    "source": []
 806   },
 807   {
 808    "cell_type": "code",
 809    "execution_count": null,
 810    "id": "0818c399-be45-4520-9b9e-d847a73ea1bf",
 811    "metadata": {},
 812    "outputs": [],
 813    "source": []
 814   },
 815   {
 816    "cell_type": "code",
 817    "execution_count": null,
 818    "id": "94ad78d5-47a0-4c9e-abe8-9f3cfc67208f",
 819    "metadata": {},
 820    "outputs": [],
 821    "source": []
 822   }
 823  ],
 824  "metadata": {
 825   "kernelspec": {
 826    "display_name": "Python 3 (ipykernel)",
 827    "language": "python",
 828    "name": "python3"
 829   },
 830   "language_info": {
 831    "codemirror_mode": {
 832     "name": "ipython",
 833     "version": 3
 834    },
 835    "file_extension": ".py",
 836    "mimetype": "text/x-python",
 837    "name": "python",
 838    "nbconvert_exporter": "python",
 839    "pygments_lexer": "ipython3",
 840    "version": "3.10.13"
 841   }
 842  },
 843  "nbformat": 4,
 844  "nbformat_minor": 5
 845 }