5 "execution_count": null,
10 "import reproducibility"
15 "execution_count": null,
16 "id": "c7291842-a72d-4c4e-9312-6c0c31df18e0",
20 "# both can change\n",
22 "import numpy as np\n",
23 "import pandas as pd\n",
24 "import tensorflow as tf\n",
25 "from keras.models import Sequential\n",
26 "from keras.layers import Dense, SimpleRNN\n",
27 "from keras.utils.vis_utils import plot_model\n",
28 "from sklearn.preprocessing import MinMaxScaler\n",
29 "from sklearn.metrics import mean_squared_error\n",
32 "import matplotlib.pyplot as plt\n",
33 "import tensorflow as tf\n",
34 "import keras.backend as K\n",
35 "from keras.utils.vis_utils import plot_model\n",
36 "from scipy.interpolate import LinearNDInterpolator, interpn\n",
37 "from scipy.optimize import root\n",
38 "from utils import hash2\n",
40 "# Local modules for handling data and running moisture models\n",
41 "import data_funcs as datf\n",
42 "from data_funcs import format_raws, retrieve_raws, format_precip, fixnan\n",
43 "from data_funcs import raws_data, synthetic_data, plot_data, check_data, mse_data, to_json, from_json\n",
44 "from moisture_rnn import create_RNN_2, staircase, create_rnn_data, train_rnn, rnn_predict\n",
45 "import moisture_models as mod\n",
47 "meso_token=\"b40cb52cbdef43ef81329b84e8fd874f\"\n"
51 "cell_type": "markdown",
52 "id": "8320fb68-771e-4441-8849-e5bdec432bd1",
55 "## Retrieve RAWS Data"
60 "execution_count": null,
61 "id": "ce5e033c-5d16-44ec-9b58-4a55ff76d04d",
67 "# raws_dat=raws_data(start='201806010800', hours=1200, h2=300, stid=\"BKCU1\",meso_token=meso_token)\n",
68 "raws_dat=from_json('kf_orig.json')\n",
74 "execution_count": null,
79 "%matplotlib inline\n",
85 "execution_count": null,
90 "plot_data(raws_dat,hmax=600)"
95 "execution_count": null,
96 "id": "87ae5df3-b9a6-4102-83fa-99c21151ecc8",
105 "execution_count": null,
106 "id": "382c9d3d-8e11-461a-a4aa-276ae46a2cde",
110 "print('fm mean:', np.nanmean(raws_dat['fm']))\n",
111 "print('rain mean:', np.nanmean(raws_dat['rain']))\n",
112 "print('Ed mean:', np.nanmean(raws_dat['Ed']))\n",
113 "print('Ew mean:', np.nanmean(raws_dat['Ew']))\n",
114 "print('m mean:', np.nanmean(raws_dat['m']))"
118 "cell_type": "markdown",
119 "id": "b45fcc1f-8394-418f-89ab-0cfbaa04d65f",
122 "## Retrieve RTMA Function\n",
124 "<mark>Not needed?</mark>"
128 "cell_type": "markdown",
133 "Jonathon changes above create each case as a dictionary, then dictionary of dictionaries, figure out how to store and load dictionaries as a file. json is possible but: cannot contain datetime objects\n",
134 "look into pickle also compresses while json is plain text clone wrfxpy look how for idioms, pickle added jan/angel lager\n",
135 "Jan will edit from here below. \n",
136 "cases will be extracted from dictionary as global variables for now at least"
141 "execution_count": null,
146 "np.random.seed(seed=123)"
151 "execution_count": null,
156 "# dictionary raws_dat has all that is needed for the run \n",
157 "# keeping the name raws_dat for now even if it may not be raws data\n",
163 "execution_count": null,
168 "synt_dat=synthetic_data() # just testinh\n",
169 "%matplotlib inline\n",
170 "plot_data(synt_dat)"
174 "cell_type": "markdown",
175 "id": "6c42a886-ecff-4379-8a12-db9a77d64045",
178 "## Fit Augmented KF"
183 "execution_count": null,
184 "id": "fda8aa6b-a241-47e3-881f-6e75373f1a2c",
188 "m,Ec = mod.run_augmented_kf(raws_dat) # extract from state\n",
190 "raws_dat['Ec']=Ec\n",
191 "plot_data(raws_dat,title2='augmented KF')"
196 "execution_count": null,
201 "plot_data(raws_dat,hmin=0,hmax=600)"
206 "execution_count": null,
207 "id": "4d5388f2-1c21-4b4e-860f-a7ea7c7e2bbc",
211 "plot_data(raws_dat,hmin=900,hmax=1200,title2='augmented KF prediction detail')"
216 "execution_count": null,
217 "id": "ff9f0f20-b38f-4643-97cd-969914fca2dc",
221 "mse_data(raws_dat)\n"
225 "cell_type": "markdown",
226 "id": "f41a26c2-4a85-4c7e-b818-a1a2906dfb25",
234 "execution_count": null,
235 "id": "01143521-8222-4e69-9cde-7dc7c7c780e0",
239 "# Set seed for reproducibility\n",
240 "tf.random.set_seed(123)"
245 "execution_count": null,
246 "id": "cff5a394-c6f0-4af1-9890-37bf65ba0e68",
250 "case_data = from_json('rnn_orig.json')"
255 "execution_count": null,
260 "plot_data(case_data,title2=' from rnn_orig.json',hmin=0,hmax=600)"
265 "execution_count": null,
270 "plot_data(case_data,title2='RNN prediction',hmin=300,hmax=600)"
275 "execution_count": null,
280 "if 'm' in case_data:\n",
281 " mse_data(case_data) # just check sdolution if there\n",
282 " del case_data['m'] # cleanup - remove old solution if any"
287 "execution_count": null,
293 "# Set seed for reproducibility\n",
294 "tf.random.set_seed(123)\n",
295 "rnn_dat = create_rnn_data(case_data,scale=False, hours=None, h2=None, verbose=verbose)"
300 "execution_count": null,
301 "id": "eb655e2b-7288-4c69-ac4d-28079835270b",
305 "## Check 1: equilibrium input data the same\n",
307 "print(hash2(rnn_dat['Et']))\n",
308 "print(hash2(rnn_dat['x_train']))\n",
309 "print(hash2(rnn_dat['y_train']))"
314 "execution_count": null,
315 "id": "871821a9-bcd9-47db-9bd6-1933094ac137",
319 "model_predict = train_rnn(\n",
321 " rnn_dat['hours'],\n",
322 " activation=['linear','linear'],\n",
323 " hidden_units=6,\n",
325 " dense_layers=1,\n",
326 " verbose = verbose\n",
332 "execution_count": null,
333 "id": "5dc3ec60-292d-4526-a793-7d466f4ce9c7",
337 "m = rnn_predict(model_predict, rnn_dat, rnn_dat['hours'], verbose = verbose)\n",
338 "case_data['m'] = m\n",
339 "note = 'm replaced by a solution from fmda_rnn_rain'\n",
340 "if 'note' in case_data:\n",
341 " case_data['note'] = case_data['note'] + '\\n' + note\n",
343 " case_data['note'] = note\n",
344 "check_data(case_data)"
349 "execution_count": null,
350 "id": "6ccc15fc-5d08-4df1-a4d5-4f0107171c15",
354 "plot_data(case_data,title2='with trained RNN',hmin=0,hmax=600)\n"
359 "execution_count": null,
360 "id": "1d06f6ee-2c05-4473-956c-ba490cf773d2",
364 "mse_data(case_data)"
369 "execution_count": null,
374 "plot_data(case_data,title2='RNN prediction',hmin=300,hmax=600)"
378 "cell_type": "markdown",
379 "id": "4a22cf6a-dfa8-45d5-9a4a-bba9f2fb386d",
387 "cell_type": "markdown",
388 "id": "f62ad24d-7974-4f22-a797-3419e51e03f9",
391 "<mark>Start Here after Check 1<\\mark>"
396 "execution_count": null,
397 "id": "e3b68921-3745-4067-857c-9d4329d9c979",
401 "from utils import hash2"
406 "execution_count": null,
407 "id": "bfbc82c0-2117-4b2b-b063-dbde46c856fd",
411 "#tf.keras.utils.set_random_seed(123)\n",
412 "#tf.random.set_seed(123)\n",
413 "reproducibility.set_seed()"
418 "execution_count": null,
419 "id": "9b09eb58-cd1f-4146-86d4-e60260d5f035",
423 "from utils import vprint\n",
425 "hours = rnn_dat['hours']\n",
427 "samples = rnn_dat['samples']\n",
428 "features = rnn_dat['features']\n",
429 "timesteps = rnn_dat['timesteps']\n",
431 "model_fit=create_RNN_2(hidden_units=6, \n",
432 " dense_units=1, \n",
433 " batch_shape=(samples,timesteps,features),\n",
435 " return_sequences=False,\n",
436 " # initial_state=h0,\n",
437 " activation=['linear','linear'],\n",
438 " dense_layers=1)\n",
440 "from keras.utils.vis_utils import plot_model\n",
441 "plot_model(model_fit, to_file='model_plot.png', \n",
442 " show_shapes=True, show_layer_names=True)"
447 "execution_count": null,
448 "id": "030953b3-be17-40d7-b0b6-7e73e8b92869",
452 "## Check 2: Untrained RNN initialized with same weights\n",
454 "hash2(model_fit.get_weights())"
459 "execution_count": null,
460 "id": "d18e09e0-7f47-4bc5-8cee-23752ac5bd5f",
464 "Et = rnn_dat['Et']\n",
465 "model_predict=create_RNN_2(hidden_units=6, dense_units=1, \n",
466 " input_shape=(hours,features),stateful = False,\n",
467 " return_sequences=True,\n",
468 " activation=['linear','linear'],dense_layers=1)"
473 "execution_count": null,
474 "id": "625a24ef-92ab-4657-87c6-aede6b7b89b2",
478 "## Check 3: Second model initialization same weights\n",
480 "hash2(model_predict.get_weights())"
485 "execution_count": null,
486 "id": "797d5c31-9bff-4df9-83f5-cf688f3291d0",
491 "x_train = rnn_dat['x_train']\n",
492 "y_train = rnn_dat['y_train']\n",
495 "# print dimensions and set initial state\n",
496 "# print('model_fit input shape',x_train.shape,'output shape',model_fit(x_train).shape)\n",
500 "w_exact= [np.array([[1.-np.exp(-0.1)]]), np.array([[np.exp(-0.1)]]), np.array([0.]),np.array([[1.0]]),np.array([-1.*DeltaE])]\n",
502 "w_initial=[np.array([[1.-np.exp(-0.1)]]), \n",
503 " np.array([[np.exp(-0.1)]]), \n",
504 " np.array([0.]),\n",
505 " np.array([[1.0]]),\n",
506 " np.array([-1.0])]\n",
508 "print('randomization of initial weights ',r)\n",
509 "w=model_fit.get_weights()\n",
510 "for i in range(len(w)):\n",
511 " vprint('weight',i,'shape',w[i].shape,'ndim',w[i].ndim,'given',w_initial[i].shape)\n",
512 " for j in range(w[i].shape[0]):\n",
513 " if w[i].ndim==2:\n",
514 " for k in range(w[i].shape[1]):\n",
515 " w[i][j][k]=w_initial[i][0][0]/w[i].shape[0] + np.random.normal(0,1)*r\n",
517 " w[i][j]=w_initial[i][0] + np.random.normal(0,1)*r\n",
518 "model_fit.set_weights(w)"
523 "execution_count": null,
524 "id": "580caac4-ba9a-48b1-86a3-8e97357bf666",
528 "## Check 4: weights and inputs the same after this step \n",
530 "print(hash2(model_fit.get_weights()))\n",
531 "print(hash2(x_train))\n",
532 "print(hash2(y_train))"
537 "execution_count": null,
542 "print('model_fit input shape',x_train.shape,'output shape',y_train.shape)\n",
543 "print('x_train',x_train)\n",
544 "print('y_train',y_train)"
549 "execution_count": null,
554 "reproducibility.set_seed()"
559 "execution_count": null,
564 "model_fit.get_weights()"
569 "execution_count": null,
570 "id": "9de62d29-2c91-48b5-a92f-da3deae470ca",
574 "model_fit.fit(x_train, y_train, epochs=5000, verbose=2, batch_size=samples)\n",
575 "w_fitted=model_fit.get_weights()\n",
576 "for i in range(len(w)):\n",
577 " vprint('weight',i,' exact:',w_exact[i],': initial:',w_initial[i],' fitted:',w_fitted[i])\n",
579 "model_predict.set_weights(w_fitted)"
584 "execution_count": null,
585 "id": "e108b89d-0747-41bd-91fc-5716375d9d2b",
589 "## Check 5: Weights NOT the same after fitting\n",
591 "hash2(model_fit.get_weights())"
596 "execution_count": null,
601 "model_fit.get_weights()"
606 "execution_count": null,
607 "id": "2cece158-e7db-4d33-be09-03022e805b06",
611 "model_fit.get_config()"
616 "execution_count": null,
617 "id": "a7c0a46d-4fe3-4d8c-8411-f95ce2b465ce",
621 "## RNN weights repeated in odd way that looks like untrained\n",
623 "model_fit.get_weights()"
628 "execution_count": null,
629 "id": "438b3b9c-7e7a-48a0-ac22-0e3ab4460e57",
633 "# evaluate model\n",
634 "model_predict.set_weights(w_fitted)\n",
635 "x_input=np.reshape(Et,(1, hours, 2))\n",
636 "y_output = model_predict.predict(x_input)\n",
637 "print('x_input.shape=',x_input.shape,'y_output.shape=',y_output.shape)\n",
639 "m = np.reshape(y_output,hours)\n",
640 "fitted_data = case_data.copy()\n",
641 "fitted_data.update({'m':m,'title':\"First RNN forecast\"})"
646 "execution_count": null,
655 "display_name": "Python 3 (ipykernel)",
656 "language": "python",
664 "file_extension": ".py",
665 "mimetype": "text/x-python",
667 "nbconvert_exporter": "python",
668 "pygments_lexer": "ipython3",