4 "cell_type": "markdown",
5 "id": "244c2fb0-4339-476c-a2db-a641e124e25a",
8 "# v2.1 exploration trying to make it work better"
13 "execution_count": null,
14 "id": "e6cc7920-e380-4b81-bac0-cd6840450e9a",
20 "import os.path as osp\n",
21 "import numpy as np\n",
22 "import pandas as pd\n",
23 "import tensorflow as tf\n",
24 "import matplotlib.pyplot as plt\n",
27 "sys.path.append('..')\n",
28 "import reproducibility\n",
29 "import pandas as pd\n",
30 "from utils import print_dict_summary\n",
31 "from data_funcs import rmse, process_train_dict\n",
32 "from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM\n",
33 "from moisture_rnn_pkl import pkl2train\n",
34 "from tensorflow.keras.callbacks import Callback\n",
35 "from utils import hash2\n",
39 "from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights\n",
46 "execution_count": null,
47 "id": "f58e8839-bf0e-4995-b966-c09e4df001ce",
55 "cell_type": "markdown",
56 "id": "fae67b50-f916-45a7-bcc7-61995ba39449",
63 "cell_type": "markdown",
64 "id": "6322f0bc-107d-40a5-96dc-804495085a99",
72 "execution_count": null,
73 "id": "12992b9a-407f-4131-ac61-e1dc338386bf",
77 "params = read_yml(\"params.yaml\", subkey='xgb')\n",
83 "execution_count": null,
84 "id": "f214fdf8-bb76-4912-8f8c-5d0c8c1230c2",
88 "dat = read_pkl(\"data/train.pkl\")"
93 "execution_count": null,
94 "id": "888b7805-15f6-4c09-a05b-7aed7d253f6e",
98 "cases = [*dat.keys()]"
103 "execution_count": null,
104 "id": "375055d8-c070-4639-9561-e47d3f21f1f8",
108 "rnn_dat = RNNData(dat[cases[0]], params['scaler'], params['features_list'])\n",
109 "rnn_dat.train_test_split(\n",
110 " time_fracs = [.8, .1, .1]\n",
112 "rnn_dat.scale_data()"
117 "execution_count": null,
118 "id": "e79f8dc8-5cf8-4190-b4ff-e640f61bd78b",
122 "from moisture_models import XGB, RF, LM"
127 "execution_count": null,
128 "id": "b3aeb47f-261e-4e29-9eeb-67215e5628f6",
137 "execution_count": null,
138 "id": "68a07b25-c586-4fc4-a3d5-c857354e7a2c",
142 "mod.fit(rnn_dat.X_train, rnn_dat.y_train)"
147 "execution_count": null,
148 "id": "c8f88819-0a7a-4420-abb9-56a47015a4de",
152 "preds = mod.predict(rnn_dat.X_test)"
157 "execution_count": null,
158 "id": "cb7cdf14-74d6-45e4-bc1b-7d4d47dd41ac",
162 "rmse(preds, rnn_dat.y_test)"
167 "execution_count": null,
168 "id": "74d478c7-8c01-448e-9a00-dd0e1ee8e325",
172 "plt.plot(rnn_dat.y_test)\n",
178 "execution_count": null,
179 "id": "c5441014-c39a-4414-a779-95b81e1ed6a8",
183 "params = read_yml(\"params.yaml\", subkey='rf')\n",
184 "rnn_dat = RNNData(dat[cases[10]], features_list = ['Ed', 'Ew', 'solar', 'wind', 'rain'])\n",
185 "rnn_dat.train_test_split(\n",
186 " time_fracs = [.8, .1, .1]\n",
192 "execution_count": null,
193 "id": "cafe711a-20cb-4bd3-a4bc-4995a843a021",
197 "import importlib\n",
198 "import moisture_models\n",
199 "importlib.reload(moisture_models)"
204 "execution_count": null,
205 "id": "ee45f7d6-f57f-4ff6-995a-527565565f94",
214 "execution_count": null,
215 "id": "fafe76e5-0212-4bd1-a058-535935a08780",
219 "mod2 = RF(params)\n",
220 "mod2.fit(rnn_dat.X_train, rnn_dat.y_train.flatten())\n",
221 "preds2 = mod2.predict(rnn_dat.X_test)\n",
222 "print(rmse(preds2, rnn_dat.y_test.flatten()))\n",
223 "plt.plot(rnn_dat.y_test)\n",
229 "execution_count": null,
230 "id": "c0ab4244-996c-49af-bf4a-8b0c47b0b6db",
234 "from moisture_models import RF\n",
240 "execution_count": null,
241 "id": "aa6c33fd-db35-4c77-9eee-fdb39a934959",
248 "execution_count": null,
249 "id": "c5598bfe-2d87-4d23-869e-aff127782462",
253 "params = read_yml(\"params.yaml\", subkey='lm')\n",
254 "rnn_dat = RNNData(dat[cases[10]], features_list = ['Ed', 'Ew', 'solar', 'wind', 'rain'])\n",
255 "rnn_dat.train_test_split(\n",
256 " time_fracs = [.8, .1, .1]\n",
263 "execution_count": null,
264 "id": "d828c15c-4078-4967-abff-c1fd15d4696d",
268 "mod.fit(rnn_dat.X_train, rnn_dat.y_train)\n",
269 "preds = mod.predict(rnn_dat.X_test)\n",
270 "print(rmse(preds2, rnn_dat.y_test.flatten()))"
275 "execution_count": null,
276 "id": "8496a32a-8269-4d6b-953e-7f33fe626789",
283 "execution_count": null,
284 "id": "75ce8bf3-6efb-4dc7-b895-def92f6ce6b4",
290 "cell_type": "markdown",
291 "id": "282cb651-b21f-401d-94c5-9e07530a9ba8",
299 "execution_count": null,
300 "id": "96fe971b-c6d3-45ee-94ee-e4f426735d56",
304 "params = RNNParams(read_yml(\"params.yaml\", subkey='rnn'))\n",
306 " 'features_list': ['Ed', 'Ew', 'solar', 'wind', 'rain']\n",
312 "execution_count": null,
313 "id": "5a55e8e7-1869-43fc-9bc6-09bd4f5a8d76",
317 "rnn_dat2 = RNNData(dat[cases[10]], params['scaler'], params['features_list'])\n",
318 "rnn_dat2.train_test_split(\n",
319 " time_fracs = [.8, .1, .1]\n",
321 "rnn_dat2.scale_data()\n",
322 "rnn_dat2.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])"
327 "execution_count": null,
328 "id": "aaec14ac-c6a6-4fcd-ad8e-d28143b92623",
332 "reproducibility.set_seed()\n",
333 "rnn = RNN(params)\n",
334 "m, errs = rnn.run_model(rnn_dat2, plot_period=\"predict\")"
339 "execution_count": null,
340 "id": "c79ed028-ba60-4db5-9864-d3b2c01e09c3",
347 "execution_count": null,
348 "id": "3e609a7c-52ea-486e-8a9f-0192b3e41e13",
355 "execution_count": null,
356 "id": "d23be7cd-0883-46e3-a573-1e19167f0fd6",
363 "execution_count": null,
364 "id": "975ed5a5-1f5a-4def-996d-bf374096e6c7",
370 "cell_type": "markdown",
371 "id": "5ef092ff-8af1-491a-b0bf-cc3e674330e0",
374 "## Phys Initialized"
379 "execution_count": null,
380 "id": "5488628e-4552-4909-83e9-413fd6878bdd",
386 " 'dense_layers': 0,\n",
387 " 'activation': ['relu', 'relu'],\n",
388 " 'phys_initialize': False,\n",
389 " 'dropout': [0,0]\n",
395 "execution_count": null,
396 "id": "56bdf26c-07e7-4e4a-a567-af7dd0f564d9",
400 "reproducibility.set_seed()\n",
401 "rnn = RNN(params)\n",
402 "m, errs = rnn.run_model(rnn_dat)"
407 "execution_count": null,
408 "id": "01227b79-98f3-4931-bdfc-ff08afa8be5f",
412 "rnn.model_train.summary()"
417 "execution_count": null,
418 "id": "918a8bf0-638b-4b4b-82fe-c6a1965a72dd",
425 "execution_count": null,
426 "id": "0aab34c7-8a09-480a-9d3e-619f7cf82b34",
431 " 'phys_initialize': True,\n",
432 " 'scaler': None, # TODO\n",
433 " 'dense_layers': 0, # NOT including single Dense output layer which is hard-coded\n",
434 " 'activation': ['linear', 'linear'], # TODO tanh, relu the same\n",
435 " 'batch_schedule_type': None # Hopefully this isn't a necessity like before, but maybe it will help\n",
441 "execution_count": null,
442 "id": "ab549075-f71f-42ad-b36f-3d1e90247e33",
446 "rnn_dat2 = RNNData(dat[cases[10]], params['scaler'], params['features_list'])\n",
447 "rnn_dat2.train_test_split(\n",
448 " time_fracs = [.8, .1, .1]\n",
450 "rnn_dat2.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])"
455 "execution_count": null,
456 "id": "195f337a-ac8a-4471-8226-94863b9385e2",
460 "import importlib\n",
461 "import moisture_rnn\n",
462 "importlib.reload(moisture_rnn)\n",
463 "from moisture_rnn import RNN, RNNData"
468 "execution_count": null,
469 "id": "9395d147-17a5-44ba-aaa2-a213ffde062b",
475 "reproducibility.set_seed()\n",
482 "execution_count": null,
483 "id": "d3eebe8a-ff12-454b-81b6-6a138924f127",
487 "m, errs = rnn.run_model(rnn_dat2)"
492 "execution_count": null,
493 "id": "bcbb0159-74c5-4f56-9d69-d85a58ddbd1a",
497 "rnn.model_predict.get_weights()"
502 "execution_count": null,
503 "id": "c25f741a-6280-4cf2-8017-e56672236fdb",
510 "execution_count": null,
511 "id": "e8ed2b03-6123-4bdf-9e26-ef2ce4951663",
515 "params['rnn_units']"
520 "execution_count": null,
521 "id": "e44302bf-af49-4140-ae31-54f7c88a6735",
526 " 'phys_initialize': True,\n",
527 " 'scaler': None, # TODO\n",
528 " 'dense_layers': 0, # NOT including single Dense output layer which is hard-coded\n",
529 " 'activation': ['relu', 'relu'], # TODO tanh, relu the same\n",
530 " 'batch_schedule_type': None # Hopefully this isn't a necessity like before, but maybe it will help\n",
536 "execution_count": null,
537 "id": "9a8ac32d-551c-43e8-988e-a3b13e6d9cd9",
541 "rnn_dat2 = RNNData(dat[cases[10]], params['scaler'], params['features_list'])\n",
542 "rnn_dat2.train_test_split(\n",
543 " time_fracs = [.8, .1, .1]\n",
545 "rnn_dat2.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])"
550 "execution_count": null,
551 "id": "ff727da8-38fb-4fda-999b-f712b98de0df",
557 "reproducibility.set_seed()\n",
559 "rnn = RNN(params)\n",
560 "m, errs = rnn.run_model(rnn_dat2)"
565 "execution_count": null,
566 "id": "b165074c-ea88-4b4d-8e41-6b6f22b4d221",
573 "execution_count": null,
574 "id": "aa5cd4e6-4441-4c77-a086-e9edefbeb83b",
581 "execution_count": null,
582 "id": "7bd1e05b-5cd8-48b4-8469-4842313d6097",
589 "execution_count": null,
590 "id": "b399346d-20b8-4c97-898a-606a4be98065",
597 "execution_count": null,
598 "id": "521285e6-6b6a-4d23-b688-9eb84b8eab68",
605 "execution_count": null,
606 "id": "12c66af1-54fd-4398-8ee2-36eeb937c40d",
613 "execution_count": null,
614 "id": "eb21fb8e-05c6-4a39-bdf1-4a57067c786d",
621 "execution_count": null,
622 "id": "628a9105-ca06-44c4-ad00-13808e2f4773",
629 "execution_count": null,
630 "id": "37fdbb3a-3e83-4541-93b2-982b6d4cbe93",
637 "execution_count": null,
638 "id": "a592a4c9-cb3b-4174-8eaa-02afd00a1897",
645 "execution_count": null,
646 "id": "3832fb05-417c-4648-8e2e-7748c06b3768",
652 "cell_type": "markdown",
653 "id": "d2360aef-e9c4-4a71-922d-336e53b82537",
663 "execution_count": null,
664 "id": "71d4e441-9bf1-4d57-bb37-091553e23212",
668 "import importlib \n",
669 "import moisture_rnn\n",
670 "importlib.reload(moisture_rnn)\n",
671 "from moisture_rnn import RNN_LSTM"
676 "execution_count": null,
677 "id": "0f6ba896-e3be-4a9f-8a42-3df64aff7d63",
681 "params = read_yml(\"params.yaml\", subkey=\"lstm\")\n",
682 "params = RNNParams(params)"
687 "execution_count": null,
688 "id": "a4cf567e-d623-4e14-b578-eed88b80d04e",
692 "rnn_dat = RNNData(dat[cases[10]], params['scaler'], params['features_list'])\n",
693 "rnn_dat.train_test_split(\n",
694 " time_fracs = [.8, .1, .1]\n",
696 "rnn_dat.scale_data()\n",
697 "rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])"
702 "execution_count": null,
703 "id": "0157a6bc-3a99-4b87-a42c-ab770d19ae37",
707 "from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback\n",
708 "params.update({'epochs': 50, 'learning_rate': 0.001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,\n",
709 " 'activation': ['tanh', 'tanh'], 'features_list': rnn_dat.features_list,\n",
710 " 'batch_schedule_type':'exp', 'bmin': 10, 'bmax':rnn_dat.hours})\n",
711 "reproducibility.set_seed(123)\n",
712 "lstm = RNN_LSTM(params)\n",
714 "history = lstm.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, \n",
715 " batch_size = params['batch_size'], epochs=params['epochs'], \n",
716 " callbacks = [ResetStatesCallback(params),\n",
717 " EarlyStoppingCallback(patience = 15)],\n",
718 " validation_data = (rnn_dat.X_val, rnn_dat.y_val))\n",
724 "execution_count": null,
725 "id": "ec95e7d4-6d57-441b-b673-f10625ee5dec",
732 "execution_count": null,
733 "id": "9b3c8d8d-ea50-44ea-8c0c-414e07cd01ac",
740 "execution_count": null,
741 "id": "03063e3c-e8f4-451d-b0cf-25bd965cd9d6",
745 "params.update({'epochs': 50, 'learning_rate': 0.001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,\n",
746 " 'activation': ['tanh', 'tanh'], 'features_list': rnn_dat.features_list,\n",
747 " 'batch_schedule_type':'exp', 'bmin': 10, 'bmax':rnn_dat.hours,\n",
748 " 'early_stopping_patience': 25})\n",
749 "reproducibility.set_seed(123)\n",
750 "lstm = RNN_LSTM(params)\n",
751 "m, errs = lstm.run_model(rnn_dat)"
756 "execution_count": null,
757 "id": "f60a24c6-9a67-45aa-bc5c-8818aa0ca049",
764 "execution_count": null,
765 "id": "00910bd2-f050-438c-ab3b-c793b83cb5f5",
774 "execution_count": null,
775 "id": "236b33e3-e864-4453-be16-cf07338c4105",
779 "params = RNNParams(read_yml(\"params.yaml\", subkey='lstm'))\n",
785 "execution_count": null,
786 "id": "fe2a484c-dc99-45a9-89fc-2f451bd719b5",
790 "train = read_pkl(\"data/train.pkl\")"
795 "execution_count": null,
796 "id": "07bfac87-a6d4-4dcc-8d11-adf83eafab76",
800 "from itertools import islice\n",
801 "train = {k: train[k] for k in islice(train, 100)}"
806 "execution_count": null,
807 "id": "4e26099b-f760-4047-afec-9e751d24b7a6",
811 "from data_funcs import combine_nested\n",
812 "rnn_dat_sp = RNNData(\n",
813 " combine_nested(train), # input dictionary\n",
814 " scaler=\"standard\", # data scaling type\n",
815 " features_list = params['features_list'] # features for predicting outcome\n",
819 "rnn_dat_sp.train_test_split( \n",
820 " time_fracs = [.8, .1, .1], # Percent of total time steps used for train/val/test\n",
821 " space_fracs = [.8, .1, .1] # Percent of total timeseries used for train/val/test\n",
823 "rnn_dat_sp.scale_data()\n",
825 "rnn_dat_sp.batch_reshape(\n",
826 " timesteps = params['timesteps'], # Timesteps aka sequence length for RNN input data. \n",
827 " batch_size = params['batch_size'] # Number of samples of length timesteps for a single round of grad. descent\n",
833 "execution_count": null,
834 "id": "10738795-c83b-4da3-88ba-09278caa35f8",
839 " 'loc_batch_reset': rnn_dat_sp.n_seqs # Used to reset hidden state when location changes for a given batch\n",
845 "execution_count": null,
846 "id": "9c5d45cc-bcf0-4b6c-9c51-c4c790a2d9a5",
850 "rnn_sp = RNN_LSTM(params)\n",
851 "m_sp, errs = rnn_sp.run_model(rnn_dat_sp)"
856 "execution_count": null,
857 "id": "ee332ccf-4e4a-4f66-b4d6-c079dbdb1411",
866 "execution_count": null,
867 "id": "739d4b26-641e-47b2-a90a-67cd32215d05",
875 "display_name": "Python 3 (ipykernel)",
876 "language": "python",
884 "file_extension": ".py",
885 "mimetype": "text/x-python",
887 "nbconvert_exporter": "python",
888 "pygments_lexer": "ipython3",