4 "cell_type": "markdown",
5 "id": "83b774b3-ef55-480a-b999-506676e49145",
8 "# v2.1 run RNN strategy serial by Location\n",
10 "This version of the RNN runs the model on each location separately, one at a time. Two main runs:\n",
11 "1. Run separate model at each location - training and prediction at least location independently - training mode periods 0:train_ind (was 0:h2), then prediction in test_ind:end. Validation data, if any, are from train_ind:test_ind\n",
12 "2. Run same model with multiple fitting calls 0:train_ind at different locations, compare prediction accuracy in test_ind:end at for all location. \n"
17 "execution_count": null,
18 "id": "83cc1dc4-3dcb-4325-9263-58101a3dc378",
22 "import numpy as np\n",
23 "from utils import print_dict_summary, print_first, str2time, logging_setup\n",
26 "import os.path as osp\n",
27 "from moisture_rnn_pkl import pkl2train\n",
28 "from moisture_rnn import RNNParams, RNNData, RNN, create_rnn_data2 \n",
29 "from utils import hash2, read_yml, read_pkl, retrieve_url\n",
30 "from moisture_rnn import RNN\n",
31 "import reproducibility\n",
32 "from data_funcs import rmse\n",
33 "from moisture_models import run_augmented_kf\n",
35 "import pandas as pd\n",
36 "import matplotlib.pyplot as plt\n",
42 "execution_count": null,
43 "id": "17db9b90-a931-4674-a447-5b8ffbcdc86a",
52 "execution_count": null,
53 "id": "35319c1c-7849-4b8c-8262-f5aa6656e0c7",
58 " url = \"https://demo.openwfm.org/web/data/fmda/dicts/test_CA_202401.pkl\", \n",
59 " dest_path = \"data/test_CA_202401.pkl\")"
64 "execution_count": null,
65 "id": "eabdbd9c-07d9-4bae-9851-cca79f321895",
69 "repro_file = \"data/reproducibility_dict_v2_TEST.pkl\"\n",
70 "file_names=['test_CA_202401.pkl']\n",
72 "file_paths = [osp.join(file_dir,file_name) for file_name in file_names]"
77 "execution_count": null,
78 "id": "dcca6185-e799-4dd1-8acb-87ad33c411d7",
82 "# read/write control\n",
83 "train_file='train.pkl'\n",
84 "train_create=True # if false, read\n",
91 "execution_count": null,
92 "id": "bc0a775b-b587-42ef-8576-e36dc0be3a75",
98 "repro = read_pkl(repro_file)\n",
100 "if train_create:\n",
101 " logging.info('creating the training cases from files %s',file_paths)\n",
102 " # osp.join works on windows too, joins paths using \\ or /\n",
103 " train = pkl2train(file_paths)\n",
105 " with open(train_file, 'wb') as file:\n",
106 " logging.info('Writing the rain cases into file %s',train_file)\n",
107 " pickle.dump(train, file)\n",
109 " logging.info('Reading the train cases from file %s',train_file)\n",
110 " train = read_pkl(train_file)"
115 "execution_count": null,
116 "id": "211a1c2f-ba8d-40b8-b29c-daa38af97a26",
120 "params_all = read_yml(\"params.yaml\")\n",
121 "print(params_all.keys())"
126 "execution_count": null,
127 "id": "698df86b-8550-4135-81df-45dbf503dd4e",
131 "# from module_param_sets import param_sets"
136 "execution_count": null,
137 "id": "4b0c9a9b-dd02-4251-aa4a-2acc1101e153",
141 "param_sets_keys=['rnn']\n",
142 "# cases=[list(train.keys())[0]]\n",
143 "cases=list(train.keys())[70:90]\n",
144 "# cases.remove('reproducibility')\n",
150 "execution_count": null,
151 "id": "dd22baf2-59d2-460e-8c47-b20116dd5982",
155 "logging.info('Running over parameter sets %s',param_sets_keys)\n",
156 "logging.info('Running over cases %s',cases)"
160 "cell_type": "markdown",
161 "id": "802f3eef-1702-4478-b6e3-2288a6edae24",
164 "## Run Reproducibility Case"
169 "execution_count": null,
170 "id": "69a3adb9-39fd-4c0c-9c9b-aaa2a9a3af40",
174 "params = repro['repro_info']['params']\n",
175 "print(type(params))\n",
178 "# Set up input data\n",
179 "rnn_dat = RNNData(repro, scaler = params['scaler'], features_list = params['features_list'])\n",
180 "rnn_dat.train_test_split(\n",
181 " train_frac = params['train_frac'],\n",
182 " val_frac = params['val_frac']\n",
184 "rnn_dat.scale_data()"
189 "execution_count": null,
190 "id": "855703c4-d7a9-4579-bca7-7c737a81d0de",
194 "reproducibility.set_seed(123)\n",
195 "rnn = RNN(params)\n",
196 "m, errs = rnn.run_model(rnn_dat, reproducibility_run=True)"
200 "cell_type": "markdown",
201 "id": "49e31fdd-4c14-4a81-9e2b-4c6ba94d1f83",
204 "## Separate Models by Location"
209 "execution_count": null,
210 "id": "e11e7c83-183f-48ba-abd8-a6aedff66090",
214 "# Set up output dictionaries\n",
221 "execution_count": null,
222 "id": "dc5b47bd-4fbc-44b8-b2dd-d118e068b450",
227 "for k in param_sets_keys:\n",
228 " params = RNNParams(params_all[k])\n",
229 " print(\"~\"*80)\n",
230 " print(\"Running with params:\")\n",
232 " # Increase Val Frac so no errors, TODO fix validation\n",
233 " params.update({\n",
234 " 'train_frac': .5,\n",
235 " 'val_frac': .2,\n",
236 " 'activation': ['relu', 'relu'],\n",
239 " for case in cases:\n",
240 " print(\"~\"*50)\n",
241 " logging.info('Processing case %s',case)\n",
242 " print_dict_summary(train[case])\n",
243 " # Format data & Run Model\n",
244 " # rnn_dat = create_rnn_data2(train[case], params)\n",
245 " rnn_dat = RNNData(train[case], scaler = params['scaler'], features_list = params['features_list'])\n",
246 " rnn_dat.train_test_split(\n",
247 " train_frac = params['train_frac'],\n",
248 " val_frac = params['val_frac']\n",
250 " rnn_dat.scale_data()\n",
251 " reproducibility.set_seed()\n",
252 " rnn = RNN(params)\n",
253 " m, errs = rnn.run_model(rnn_dat)\n",
254 " # Add model output to case\n",
255 " train[case]['m']=m\n",
256 " # Get RMSE Prediction Error\n",
257 " print(f\"RMSE: {errs}\")\n",
258 " outputs_rnn[case] = {'case':case, 'm': m.copy(), 'errs': errs.copy()}\n",
260 " # Run Augmented KF\n",
261 " print('Running Augmented KF')\n",
262 " train[case]['h2'] = train[case]['hours'] // 2\n",
263 " train[case]['scale_fm'] = 1\n",
264 " m, Ec = run_augmented_kf(train[case])\n",
265 " m = m*rnn_dat['scale_fm']\n",
266 " y = rnn_dat['y']*rnn_dat['scale_fm'] \n",
267 " train[case]['m'] = m\n",
268 " print(f\"KF RMSE: {rmse(m,y)}\")\n",
269 " outputs_kf[case] = {'case':case, 'm': m.copy(), 'errs': rmse(m,y)}"
274 "execution_count": null,
275 "id": "15384e4d-b8ec-4700-bdc2-83b0433d11c9",
279 "logging.info('fmda_rnn_serial.ipynb done')"
284 "execution_count": null,
285 "id": "d0e78fb3-b501-49d6-81a9-1a13da0134a0",
289 "import importlib\n",
290 "import moisture_rnn\n",
291 "importlib.reload(moisture_rnn)\n",
292 "from moisture_rnn import RNN"
297 "execution_count": null,
298 "id": "37053436-8dfe-4c40-8614-811817e83782",
302 "for k in outputs_rnn:\n",
303 " print(\"~\"*50)\n",
304 " print(outputs_rnn[k]['case'])\n",
305 " print(outputs_rnn[k]['errs']['prediction'])"
310 "execution_count": null,
311 "id": "9154d5f7-015f-4ef7-af45-020410a1ea65",
315 "for k in outputs_kf:\n",
316 " print(\"~\"*50)\n",
317 " print(outputs_kf[k]['case'])\n",
318 " print(outputs_kf[k]['errs'])"
323 "execution_count": null,
324 "id": "fe407f61-15f2-4086-a386-7d7a5bb90d26",
331 "execution_count": null,
332 "id": "2fdb63b3-68b8-4877-a7a2-f63257cb29d5",
339 "execution_count": null,
340 "id": "5c7563c5-a880-45c7-8381-8ce4e1a44216",
347 "execution_count": null,
348 "id": "ad5dae6c-1269-4674-a49e-2efe8b956911",
356 "display_name": "Python 3 (ipykernel)",
357 "language": "python",
365 "file_extension": ".py",
366 "mimetype": "text/x-python",
368 "nbconvert_exporter": "python",
369 "pygments_lexer": "ipython3",