4 "cell_type": "markdown",
5 "id": "244c2fb0-4339-476c-a2db-a641e124e25a",
8 "# v2.1 exploration trying to make it work better"
13 "execution_count": null,
14 "id": "e6cc7920-e380-4b81-bac0-cd6840450e9a",
20 "import os.path as osp\n",
21 "import numpy as np\n",
22 "import pandas as pd\n",
23 "import tensorflow as tf\n",
24 "import matplotlib.pyplot as plt\n",
27 "sys.path.append('..')\n",
28 "import reproducibility\n",
29 "import pandas as pd\n",
30 "from utils import print_dict_summary\n",
31 "from data_funcs import rmse\n",
32 "from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM, create_rnn_data2\n",
33 "from moisture_rnn_pkl import pkl2train\n",
34 "from tensorflow.keras.callbacks import Callback\n",
35 "from utils import hash2\n",
39 "from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights\n",
46 "execution_count": null,
47 "id": "f58e8839-bf0e-4995-b966-c09e4df001ce",
55 "cell_type": "markdown",
56 "id": "b8fe1011-a0cc-46a4-98b7-d82c2b22f5b0",
64 "execution_count": null,
65 "id": "0df1c817-d422-4cfa-a4c5-b02549cdaffa",
71 "train = read_pkl('train.pkl')\n",
77 "execution_count": null,
78 "id": "4f2623ea-3504-446e-8243-f93ccce6b62e",
83 "import moisture_rnn\n",
84 "importlib.reload(moisture_rnn)\n",
85 "from moisture_rnn import RNN, RNNData"
90 "execution_count": null,
91 "id": "948138a6-1854-428c-b5ec-75e87c9c50e7",
95 "params = read_yml(\"params.yaml\", subkey=\"rnn\")\n",
96 "params = RNNParams(params)\n",
97 "rnn_dat = RNNData(train['PLFI1_202401'], scaler=params['scaler'], features_list = params['features_list'])\n",
98 "rnn_dat.train_test_split(\n",
99 " train_frac = .9,\n",
102 "rnn_dat.scale_data()\n",
103 "rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])"
108 "execution_count": null,
109 "id": "78255617-5511-4e54-a022-8dba9946bbe2",
113 "reproducibility.set_seed()\n",
114 "params.update({'batch_schedule_type': 'exp', 'bmin': 20, 'bmax': rnn_dat.hours})\n",
115 "rnn = RNN(params)\n",
116 "m, errs = rnn.run_model(rnn_dat, plot_period=\"predict\")"
121 "execution_count": null,
122 "id": "d3a32de3-9556-491a-9bf1-3d252762f2b7",
129 "execution_count": null,
130 "id": "2a7d607c-8f29-4a18-948b-4d939ebd5a34",
137 "execution_count": null,
138 "id": "552c6e02-4a2d-4f50-9d6a-7e11bdbcfffc",
145 "execution_count": null,
146 "id": "5cfd0dbe-8e7d-4d9e-a21c-9001a498084c",
150 "params.update({'epochs': 2, 'verbose_fit': True, 'batch_size': 32, \n",
151 " 'rnn_layers': 2, 'activation':['relu', 'relu']})\n",
152 "rnn_dat = RNNData(train['PLFI1_202401'], scaler=params['scaler'], features_list = params['features_list'])\n",
153 "rnn_dat.train_test_split(\n",
154 " train_frac = .9,\n",
157 "rnn_dat.scale_data()\n",
158 "rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])\n",
159 "reproducibility.set_seed()\n",
160 "rnn = RNN(params)\n",
161 "m, errs = rnn.run_model(rnn_dat, plot_period=\"predict\")"
166 "execution_count": null,
167 "id": "db2abad4-16d4-4afc-a0d8-b2dec6b872c2",
174 "execution_count": null,
175 "id": "74158e6e-c84f-4a90-9f0a-c35cb711d9ed",
182 "execution_count": null,
183 "id": "c022cce2-8863-43f4-96e8-c604ba2fe8bc",
190 "execution_count": null,
191 "id": "81037c9e-088f-4dd2-bbfa-5b9b2d044d80",
198 "execution_count": null,
199 "id": "9aaaa2ff-6757-48a6-b03b-568d2b0d01b0",
206 "execution_count": null,
207 "id": "ca8637d2-d111-4054-b174-ef913f0d9206",
214 "execution_count": null,
215 "id": "12217194-a9be-49bc-99ef-6b2a107ab4f3",
222 "execution_count": null,
223 "id": "7c3c428e-c628-4712-bdc7-15dce26837dd",
230 "execution_count": null,
231 "id": "84fe9438-2f2e-483c-a5a4-9dd3a61f50aa",
238 "execution_count": null,
239 "id": "77d90197-2b50-4621-9eee-def323ed836e",
246 "execution_count": null,
247 "id": "e492ae99-ea1f-4185-9a33-41573263f2f1",
254 "execution_count": null,
255 "id": "56b18e34-b50b-48a6-947a-5714b65e85cf",
262 "xgrid = np.arange(0, ep)\n",
263 "plt.plot(xgrid, calc_exp_intervals(bmin, bmax, ep))\n",
264 "plt.plot(xgrid, calc_log_intervals(bmin, bmax, ep))"
269 "execution_count": null,
270 "id": "8ae6b9d7-f108-4071-a9fc-1b7a32b26d75",
277 "execution_count": null,
278 "id": "7be5f53c-6130-4d94-986b-e9b5dce7fdae",
285 "execution_count": null,
286 "id": "b0b0a959-f07c-4b62-bee2-faa993320dda",
293 "execution_count": null,
294 "id": "23d94e44-cff1-4a2a-9a0e-039bf401d4e3",
301 "execution_count": null,
302 "id": "98382367-820a-4aad-97da-3ea2bc895b0f",
309 "execution_count": null,
310 "id": "185c6f90-fe7f-4b05-b5ef-20635051f18b",
317 "execution_count": null,
318 "id": "5520cd04-72b3-4550-bc54-ad78cbf77ec0",
325 "execution_count": null,
326 "id": "fd0a4fa9-603f-4662-ad4e-14df33337441",
333 "execution_count": null,
334 "id": "253c667a-748d-48cc-8479-50616e043609",
341 "execution_count": null,
342 "id": "90cafb80-72e9-4610-8413-ca407f03dbd0",
349 "execution_count": null,
350 "id": "9ebf0ccd-554f-4a0a-87e9-36de0a62a34c",
357 "execution_count": null,
358 "id": "23d8d51b-6206-471a-a792-a85f4ad89637",
365 "execution_count": null,
366 "id": "d25faf9f-d00f-44a6-a43f-e4a3277aad78",
373 "execution_count": null,
374 "id": "e8c47be0-92f6-454f-9e04-385c3cb41831",
381 "execution_count": null,
382 "id": "c8d197e6-1959-4a6f-8d2d-c67a97a123f9",
389 "execution_count": null,
390 "id": "3aa5aece-79c5-42b7-98a2-3f8a3cfb8e29",
397 "execution_count": null,
398 "id": "a9f8a650-330f-493e-a158-a683e2fd872d",
404 "cell_type": "markdown",
405 "id": "b62f4360-e9d1-4510-bb5d-1d79a3a5ac75",
408 "## Test Spatial Data"
413 "execution_count": null,
414 "id": "3a04c2d3-3bf1-451d-88bc-7b1e8701cb52",
418 "train = read_pkl('train.pkl')"
423 "execution_count": null,
424 "id": "3d416f92-995a-427f-b76d-a6125061ee98",
428 "params = read_yml(\"params.yaml\", subkey=\"rnn\")\n",
429 "params = RNNParams(params)"
434 "execution_count": null,
435 "id": "1d76cd6e-2e0e-40ae-9a58-3ed62217a33d",
444 "execution_count": null,
445 "id": "4735052a-f046-4d52-8666-ce14e4a0e276",
449 "from itertools import islice\n",
450 "dat = {k: train[k] for k in islice(train, 100)}"
455 "execution_count": null,
456 "id": "194f815f-c889-43ed-b0e8-853b1c4a8a81",
465 "execution_count": null,
466 "id": "99819622-555b-4027-a644-5f75c76f7fbc",
470 "from data_funcs import combine_nested\n",
471 "dd = combine_nested(dat)"
476 "execution_count": null,
477 "id": "4a647e8a-61f3-4e5b-abb3-44d43d7e0844",
481 "import importlib\n",
483 "importlib.reload(utils)\n",
484 "from utils import Dict"
489 "execution_count": null,
490 "id": "6335deef-ea25-40bd-8a68-842af80cebe8",
499 "execution_count": null,
500 "id": "e86c9e4d-4ccd-4d9d-92e1-2e4299549fa4",
504 "import importlib\n",
505 "import moisture_rnn\n",
506 "importlib.reload(moisture_rnn)\n",
507 "from moisture_rnn import RNNData"
512 "execution_count": null,
513 "id": "966c3559-740d-44d3-b98d-cc2efe63afcd",
517 "rnn_dat = RNNData(dd, scaler=\"standard\", features_list = ['Ed', 'Ew', 'rain'])\n",
518 "rnn_dat.train_test_split( \n",
519 " train_frac = .9,\n",
526 "execution_count": null,
527 "id": "72289573-56a1-45ca-8551-b24c4c073bfd",
531 "rnn_dat.scale_data()"
536 "execution_count": null,
537 "id": "b35a8e1a-a161-42af-a595-2e1bae0fd0ba",
541 "rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])"
546 "execution_count": null,
547 "id": "c1520e93-0df1-41d1-98b3-f50edbe13b66",
554 "execution_count": null,
555 "id": "fa14f0ab-07ff-4c67-bdcb-ff225610ffa2",
559 "import importlib\n",
560 "import moisture_rnn\n",
561 "importlib.reload(moisture_rnn)\n",
562 "from moisture_rnn import RNN"
567 "execution_count": null,
568 "id": "94f2030f-dbc9-4a6c-8e98-b932fe7691c7",
572 "from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback\n",
573 "params.update({'epochs': 20, 'learning_rate': 0.0001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,\n",
574 " 'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})\n",
575 "reproducibility.set_seed(123)\n",
576 "rnn = RNN(params)\n",
578 "history = rnn.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, \n",
579 " batch_size = params['batch_size'], epochs=params['epochs'], \n",
580 " callbacks = [ResetStatesCallback(params),\n",
581 " EarlyStoppingCallback(patience = params['early_stopping_patience'])],\n",
582 " validation_data = (rnn_dat.X_val, rnn_dat.y_val))\n",
588 "execution_count": null,
589 "id": "32c39f16-6d80-44ef-b58a-be12869cd638",
594 "plt.semilogy(history.history['loss'], label='Training loss')\n",
595 "if 'val_loss' in history.history:\n",
596 " plt.semilogy(history.history['val_loss'], label='Validation loss')\n",
597 "plt.ylabel('Loss')\n",
598 "plt.xlabel('Epoch')\n",
599 "plt.legend(loc='upper left')\n",
605 "execution_count": null,
606 "id": "b5a88957-c7c7-4036-85bd-94cc4aa5c08c",
610 "vpreds = rnn.model_train.predict(rnn_dat.X_val)"
615 "execution_count": null,
616 "id": "f4f1ad13-6d23-4c3a-80f8-32d4cd7a9902",
625 "execution_count": null,
626 "id": "4e3e62bd-36f9-4ce5-befd-cecf01f13bd1",
630 "rnn_dat.y_val.shape"
635 "execution_count": null,
636 "id": "308ec7c9-a73b-4405-912a-454811a413ac",
640 "from sklearn.metrics import mean_squared_error"
645 "execution_count": null,
646 "id": "2d0cd42b-ffbd-413a-a73b-a622865c1b61",
650 "mean_squared_error(vpreds, rnn_dat.y_val)"
655 "execution_count": null,
656 "id": "1983a28d-f6b8-4a35-94f0-022c5ef898d2",
660 "loss = tf.keras.losses.mse(rnn_dat.y_val, vpreds)\n",
661 "loss = tf.reduce_mean(loss).numpy()\n",
667 "execution_count": null,
668 "id": "28183109-605e-4392-b5ab-df79776f023a",
672 "plt.scatter(vpreds, rnn_dat.y_val)"
677 "execution_count": null,
678 "id": "c5335e7b-1d3f-4d76-8452-85b039b386ef",
682 "hash_weights(rnn.model_train)"
687 "execution_count": null,
688 "id": "1b4464b2-5ea2-4c1c-b092-9a478d5fffe4",
692 "rnn.model_predict.set_weights(rnn.model_train.get_weights())"
697 "execution_count": null,
698 "id": "c2a681d1-3402-4053-aec5-ecc1a94237b8",
702 "hash_weights(rnn.model_predict)"
707 "execution_count": null,
708 "id": "e94d82d8-5738-4d66-897c-3ac68036ec95",
715 "execution_count": null,
716 "id": "cc74db07-84ed-4e33-949c-5e0548e98007",
723 "execution_count": null,
724 "id": "df1309f1-6eac-4e07-b1bb-3c1a4cc9c5bf",
731 "execution_count": null,
732 "id": "0166e780-3379-414b-b403-86bca3c36661",
736 "preds = rnn.predict(rnn_dat.X_test[0])"
741 "execution_count": null,
742 "id": "f878b950-b750-43db-b5e5-32723f9d0f07",
746 "plt.plot(rnn_dat.y_test[2])\n",
752 "execution_count": null,
753 "id": "3aa8dd59-12f0-46ec-b422-dcd23d8076bf",
760 "execution_count": null,
761 "id": "17e09a7a-19ac-4e82-af54-dcced5791669",
768 "execution_count": null,
769 "id": "33072593-30a4-49a0-8372-67012e1213eb",
776 "execution_count": null,
777 "id": "090b36f5-fd34-4cc7-a84d-f8dd0d582000",
784 "execution_count": null,
785 "id": "7deda359-1e7f-447a-97b7-576b98712a74",
789 "from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback\n",
790 "params.update({'epochs': 20, 'learning_rate': 0.0001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,\n",
791 " 'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})\n",
792 "reproducibility.set_seed(123)\n",
798 "execution_count": null,
799 "id": "37d7f239-4d47-46d0-b891-b2a8d9da8c4e",
803 "m, errs = rnn.run_model(rnn_dat, plot_period=\"predict\")"
808 "execution_count": null,
809 "id": "150754f6-9927-4188-969a-d253bb0a5b22",
818 "execution_count": null,
819 "id": "8592cce5-77fe-4804-8df2-de92f058d11f",
823 "len(rnn_dat.X_test)"
828 "execution_count": null,
829 "id": "3047a7a3-32c2-4af0-aff9-bebdb1a877c1",
833 "preds0 = rnn.predict(rnn_dat.X_test[0])"
838 "execution_count": null,
839 "id": "2880b410-35a2-4d6c-ac28-2e2f366ec3a2",
843 "rmse(preds0, rnn_dat.y_test[0])"
848 "execution_count": null,
849 "id": "87623222-5fba-4833-8873-01933e9aba88",
853 "plt.plot(rnn_dat.y_test[0])\n",
859 "execution_count": null,
860 "id": "7f922046-e74f-424e-aa1c-d6d4b2eb3a46",
867 "execution_count": null,
868 "id": "47a098c2-28c3-483d-b062-da1d534f7766",
875 "execution_count": null,
876 "id": "4a581630-2dc0-4cdb-8647-c81d41e149bc",
883 "execution_count": null,
884 "id": "36b931d4-15dc-41a8-8748-610a2406ccad",
891 "execution_count": null,
892 "id": "055d98f5-4028-4822-b409-b03d437490da",
899 "execution_count": null,
900 "id": "beb357ab-16dc-4c91-a121-6dfc509f4ff6",
907 "execution_count": null,
908 "id": "a319b314-b156-47af-8541-f97145352e5c",
915 "execution_count": null,
916 "id": "b6922358-b824-4c77-abe4-c9b605a78738",
922 "cell_type": "markdown",
923 "id": "d2360aef-e9c4-4a71-922d-336e53b82537",
933 "execution_count": null,
934 "id": "71d4e441-9bf1-4d57-bb37-091553e23212",
938 "import importlib \n",
939 "import moisture_rnn\n",
940 "importlib.reload(moisture_rnn)\n",
941 "from moisture_rnn import RNN_LSTM"
946 "execution_count": null,
947 "id": "0f6ba896-e3be-4a9f-8a42-3df64aff7d63",
951 "params = read_yml(\"params.yaml\", subkey=\"lstm\")\n",
952 "params = RNNParams(params)"
957 "execution_count": null,
958 "id": "0157a6bc-3a99-4b87-a42c-ab770d19ae37",
962 "from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback\n",
963 "params.update({'epochs': 20, 'learning_rate': 0.0001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,\n",
964 " 'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})\n",
965 "reproducibility.set_seed(123)\n",
966 "lstm = RNN_LSTM(params)\n",
968 "history = lstm.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, \n",
969 " batch_size = params['batch_size'], epochs=params['epochs'], \n",
970 " callbacks = [ResetStatesCallback(params),\n",
971 " EarlyStoppingCallback(patience = params['early_stopping_patience'])],\n",
972 " validation_data = (rnn_dat.X_val, rnn_dat.y_val))\n",
978 "execution_count": null,
979 "id": "de0c00e7-838f-41b6-9cc5-70594656d155",
986 "execution_count": null,
987 "id": "430a2224-6798-48fa-b198-a32800f88f66",
994 "execution_count": null,
995 "id": "ec95e7d4-6d57-441b-b673-f10625ee5dec",
1001 "cell_type": "code",
1002 "execution_count": null,
1003 "id": "9b3c8d8d-ea50-44ea-8c0c-414e07cd01ac",
1009 "cell_type": "code",
1010 "execution_count": null,
1011 "id": "03063e3c-e8f4-451d-b0cf-25bd965cd9d6",
1017 "cell_type": "code",
1018 "execution_count": null,
1019 "id": "f60a24c6-9a67-45aa-bc5c-8818aa0ca049",
1027 "display_name": "Python 3 (ipykernel)",
1028 "language": "python",
1032 "codemirror_mode": {
1036 "file_extension": ".py",
1037 "mimetype": "text/x-python",
1039 "nbconvert_exporter": "python",
1040 "pygments_lexer": "ipython3",