fmda/fmda_rnn_rain.ipynb

   1 {
   2  "cells": [
   3   {
   4    "cell_type": "code",
   5    "execution_count": null,
   6    "id": "d70c330d",
   7    "metadata": {},
   8    "outputs": [],
   9    "source": [
  10     "import reproducibility"
  11    ]
  12   },
  13   {
  14    "cell_type": "code",
  15    "execution_count": null,
  16    "id": "c7291842-a72d-4c4e-9312-6c0c31df18e0",
  17    "metadata": {},
  18    "outputs": [],
  19    "source": [
  20     "# both can change\n",
  21     "# Environment\n",
  22     "import numpy as np\n",
  23     "import pandas as pd\n",
  24     "import tensorflow as tf\n",
  25     "from keras.models import Sequential\n",
  26     "from keras.layers import Dense, SimpleRNN\n",
  27     "from keras.utils.vis_utils import plot_model\n",
  28     "from sklearn.preprocessing import MinMaxScaler\n",
  29     "from sklearn.metrics import mean_squared_error\n",
  30     "import math\n",
  31     "import json\n",
  32     "import matplotlib.pyplot as plt\n",
  33     "import tensorflow as tf\n",
  34     "import keras.backend as K\n",
  35     "from keras.utils.vis_utils import plot_model\n",
  36     "from scipy.interpolate import LinearNDInterpolator, interpn\n",
  37     "from scipy.optimize import root\n",
  38     "from utils import hash2\n",
  39     "\n",
  40     "# Local modules for handling data and running moisture models\n",
  41     "import data_funcs as datf\n",
  42     "from data_funcs import format_raws, retrieve_raws, format_precip, fixnan\n",
  43     "from data_funcs import raws_data, synthetic_data, plot_data, check_data, mse_data, to_json, from_json\n",
  44     "from moisture_rnn import create_RNN_2, staircase, create_rnn_data, train_rnn, rnn_predict\n",
  45     "import moisture_models as mod\n",
  46     "\n",
  47     "meso_token=\"b40cb52cbdef43ef81329b84e8fd874f\"\n"
  48    ]
  49   },
  50   {
  51    "cell_type": "markdown",
  52    "id": "8320fb68-771e-4441-8849-e5bdec432bd1",
  53    "metadata": {},
  54    "source": [
  55     "## Retrieve RAWS Data"
  56    ]
  57   },
  58   {
  59    "cell_type": "code",
  60    "execution_count": null,
  61    "id": "ce5e033c-5d16-44ec-9b58-4a55ff76d04d",
  62    "metadata": {
  63     "scrolled": true
  64    },
  65    "outputs": [],
  66    "source": [
  67     "# raws_dat=raws_data(start='201806010800', hours=1200, h2=300, stid=\"BKCU1\",meso_token=meso_token)\n",
  68     "raws_dat=from_json('kf_orig.json')\n",
  69     "# hash2(raws_dat)"
  70    ]
  71   },
  72   {
  73    "cell_type": "code",
  74    "execution_count": null,
  75    "id": "8c1766b6",
  76    "metadata": {},
  77    "outputs": [],
  78    "source": [
  79     "%matplotlib inline\n",
  80     "plot_data(raws_dat)"
  81    ]
  82   },
  83   {
  84    "cell_type": "code",
  85    "execution_count": null,
  86    "id": "c655b1b9",
  87    "metadata": {},
  88    "outputs": [],
  89    "source": [
  90     "plot_data(raws_dat,hmax=600)"
  91    ]
  92   },
  93   {
  94    "cell_type": "code",
  95    "execution_count": null,
  96    "id": "87ae5df3-b9a6-4102-83fa-99c21151ecc8",
  97    "metadata": {},
  98    "outputs": [],
  99    "source": [
 100     "raws_dat.keys()"
 101    ]
 102   },
 103   {
 104    "cell_type": "code",
 105    "execution_count": null,
 106    "id": "382c9d3d-8e11-461a-a4aa-276ae46a2cde",
 107    "metadata": {},
 108    "outputs": [],
 109    "source": [
 110     "print('fm mean:', np.nanmean(raws_dat['fm']))\n",
 111     "print('rain mean:', np.nanmean(raws_dat['rain']))\n",
 112     "print('Ed mean:', np.nanmean(raws_dat['Ed']))\n",
 113     "print('Ew mean:', np.nanmean(raws_dat['Ew']))\n",
 114     "print('m mean:', np.nanmean(raws_dat['m']))"
 115    ]
 116   },
 117   {
 118    "cell_type": "markdown",
 119    "id": "b45fcc1f-8394-418f-89ab-0cfbaa04d65f",
 120    "metadata": {},
 121    "source": [
 122     "## Retrieve RTMA Function\n",
 123     "\n",
 124     "<mark>Not needed?</mark>"
 125    ]
 126   },
 127   {
 128    "cell_type": "markdown",
 129    "id": "c7b64034",
 130    "metadata": {},
 131    "source": [
 132     "## Interface\n",
 133     "Jonathon changes above  create each case as a dictionary, then dictionary of dictionaries, figure out how to store and load dictionaries as a file. json is possible but: cannot contain datetime objects\n",
 134     "look into pickle also compresses while json is plain text clone wrfxpy look how for idioms, pickle added jan/angel lager\n",
 135     "Jan will edit from here below. \n",
 136     "cases will be extracted from dictionary as global variables for now at least"
 137    ]
 138   },
 139   {
 140    "cell_type": "code",
 141    "execution_count": null,
 142    "id": "b1614583",
 143    "metadata": {},
 144    "outputs": [],
 145    "source": [
 146     "np.random.seed(seed=123)"
 147    ]
 148   },
 149   {
 150    "cell_type": "code",
 151    "execution_count": null,
 152    "id": "521b453c",
 153    "metadata": {},
 154    "outputs": [],
 155    "source": [
 156     "# dictionary raws_dat has all that is needed for the run \n",
 157     "# keeping the name raws_dat for now even if it may not be raws data\n",
 158     "\n"
 159    ]
 160   },
 161   {
 162    "cell_type": "code",
 163    "execution_count": null,
 164    "id": "df5a0e60",
 165    "metadata": {},
 166    "outputs": [],
 167    "source": [
 168     "synt_dat=synthetic_data()  # just testinh\n",
 169     "%matplotlib inline\n",
 170     "plot_data(synt_dat)"
 171    ]
 172   },
 173   {
 174    "cell_type": "markdown",
 175    "id": "6c42a886-ecff-4379-8a12-db9a77d64045",
 176    "metadata": {},
 177    "source": [
 178     "## Fit Augmented KF"
 179    ]
 180   },
 181   {
 182    "cell_type": "code",
 183    "execution_count": null,
 184    "id": "fda8aa6b-a241-47e3-881f-6e75373f1a2c",
 185    "metadata": {},
 186    "outputs": [],
 187    "source": [
 188     "m,Ec = mod.run_augmented_kf(raws_dat)  # extract from state\n",
 189     "raws_dat['m']=m\n",
 190     "raws_dat['Ec']=Ec\n",
 191     "plot_data(raws_dat,title2='augmented KF')"
 192    ]
 193   },
 194   {
 195    "cell_type": "code",
 196    "execution_count": null,
 197    "id": "e98780d3",
 198    "metadata": {},
 199    "outputs": [],
 200    "source": [
 201     "plot_data(raws_dat,hmin=0,hmax=600)"
 202    ]
 203   },
 204   {
 205    "cell_type": "code",
 206    "execution_count": null,
 207    "id": "4d5388f2-1c21-4b4e-860f-a7ea7c7e2bbc",
 208    "metadata": {},
 209    "outputs": [],
 210    "source": [
 211     "plot_data(raws_dat,hmin=900,hmax=1200,title2='augmented KF prediction detail')"
 212    ]
 213   },
 214   {
 215    "cell_type": "code",
 216    "execution_count": null,
 217    "id": "ff9f0f20-b38f-4643-97cd-969914fca2dc",
 218    "metadata": {},
 219    "outputs": [],
 220    "source": [
 221     "mse_data(raws_dat)\n"
 222    ]
 223   },
 224   {
 225    "cell_type": "markdown",
 226    "id": "f41a26c2-4a85-4c7e-b818-a1a2906dfb25",
 227    "metadata": {},
 228    "source": [
 229     "## Fit RNN Model"
 230    ]
 231   },
 232   {
 233    "cell_type": "code",
 234    "execution_count": null,
 235    "id": "01143521-8222-4e69-9cde-7dc7c7c780e0",
 236    "metadata": {},
 237    "outputs": [],
 238    "source": [
 239     "# Set seed for reproducibility\n",
 240     "tf.random.set_seed(123)"
 241    ]
 242   },
 243   {
 244    "cell_type": "code",
 245    "execution_count": null,
 246    "id": "cff5a394-c6f0-4af1-9890-37bf65ba0e68",
 247    "metadata": {},
 248    "outputs": [],
 249    "source": [
 250     "case_data = from_json('rnn_orig.json')"
 251    ]
 252   },
 253   {
 254    "cell_type": "code",
 255    "execution_count": null,
 256    "id": "1fcfc8e5",
 257    "metadata": {},
 258    "outputs": [],
 259    "source": [
 260     "plot_data(case_data,title2=' from rnn_orig.json',hmin=0,hmax=600)"
 261    ]
 262   },
 263   {
 264    "cell_type": "code",
 265    "execution_count": null,
 266    "id": "249b93d6",
 267    "metadata": {},
 268    "outputs": [],
 269    "source": [
 270     "plot_data(case_data,title2='RNN prediction',hmin=300,hmax=600)"
 271    ]
 272   },
 273   {
 274    "cell_type": "code",
 275    "execution_count": null,
 276    "id": "46b34ba2",
 277    "metadata": {},
 278    "outputs": [],
 279    "source": [
 280     "if 'm' in case_data:\n",
 281     "    mse_data(case_data)  # just check sdolution if there\n",
 282     "    del case_data['m']   # cleanup - remove old solution if any"
 283    ]
 284   },
 285   {
 286    "cell_type": "code",
 287    "execution_count": null,
 288    "id": "58615e60",
 289    "metadata": {},
 290    "outputs": [],
 291    "source": [
 292     "verbose = False\n",
 293     "# Set seed for reproducibility\n",
 294     "tf.random.set_seed(123)\n",
 295     "rnn_dat = create_rnn_data(case_data,scale=False, hours=None, h2=None, verbose=verbose)"
 296    ]
 297   },
 298   {
 299    "cell_type": "code",
 300    "execution_count": null,
 301    "id": "eb655e2b-7288-4c69-ac4d-28079835270b",
 302    "metadata": {},
 303    "outputs": [],
 304    "source": [
 305     "## Check 1: equilibrium input data the same\n",
 306     "\n",
 307     "print(hash2(rnn_dat['Et']))\n",
 308     "print(hash2(rnn_dat['x_train']))\n",
 309     "print(hash2(rnn_dat['y_train']))"
 310    ]
 311   },
 312   {
 313    "cell_type": "code",
 314    "execution_count": null,
 315    "id": "871821a9-bcd9-47db-9bd6-1933094ac137",
 316    "metadata": {},
 317    "outputs": [],
 318    "source": [
 319     "model_predict = train_rnn(\n",
 320     "    rnn_dat,\n",
 321     "    rnn_dat['hours'],\n",
 322     "    activation=['linear','linear'],\n",
 323     "    hidden_units=6,\n",
 324     "    dense_units=1,\n",
 325     "    dense_layers=1,\n",
 326     "    verbose = verbose\n",
 327     ")"
 328    ]
 329   },
 330   {
 331    "cell_type": "code",
 332    "execution_count": null,
 333    "id": "5dc3ec60-292d-4526-a793-7d466f4ce9c7",
 334    "metadata": {},
 335    "outputs": [],
 336    "source": [
 337     "m = rnn_predict(model_predict, rnn_dat, rnn_dat['hours'], verbose = verbose)\n",
 338     "case_data['m'] = m\n",
 339     "note = 'm replaced by a solution from fmda_rnn_rain'\n",
 340     "if 'note' in case_data:\n",
 341     "    case_data['note'] = case_data['note'] + '\\n' + note\n",
 342     "else:\n",
 343     "    case_data['note'] = note\n",
 344     "check_data(case_data)"
 345    ]
 346   },
 347   {
 348    "cell_type": "code",
 349    "execution_count": null,
 350    "id": "6ccc15fc-5d08-4df1-a4d5-4f0107171c15",
 351    "metadata": {},
 352    "outputs": [],
 353    "source": [
 354     "plot_data(case_data,title2='with trained RNN',hmin=0,hmax=600)\n"
 355    ]
 356   },
 357   {
 358    "cell_type": "code",
 359    "execution_count": null,
 360    "id": "1d06f6ee-2c05-4473-956c-ba490cf773d2",
 361    "metadata": {},
 362    "outputs": [],
 363    "source": [
 364     "mse_data(case_data)"
 365    ]
 366   },
 367   {
 368    "cell_type": "code",
 369    "execution_count": null,
 370    "id": "4e1668f8",
 371    "metadata": {},
 372    "outputs": [],
 373    "source": [
 374     "plot_data(case_data,title2='RNN prediction',hmin=300,hmax=600)"
 375    ]
 376   },
 377   {
 378    "cell_type": "markdown",
 379    "id": "4a22cf6a-dfa8-45d5-9a4a-bba9f2fb386d",
 380    "metadata": {},
 381    "source": [
 382     "---\n",
 383     "---"
 384    ]
 385   },
 386   {
 387    "cell_type": "markdown",
 388    "id": "f62ad24d-7974-4f22-a797-3419e51e03f9",
 389    "metadata": {},
 390    "source": [
 391     "<mark>Start Here after Check 1<\\mark>"
 392    ]
 393   },
 394   {
 395    "cell_type": "code",
 396    "execution_count": null,
 397    "id": "e3b68921-3745-4067-857c-9d4329d9c979",
 398    "metadata": {},
 399    "outputs": [],
 400    "source": [
 401     "from utils import hash2"
 402    ]
 403   },
 404   {
 405    "cell_type": "code",
 406    "execution_count": null,
 407    "id": "bfbc82c0-2117-4b2b-b063-dbde46c856fd",
 408    "metadata": {},
 409    "outputs": [],
 410    "source": [
 411     "#tf.keras.utils.set_random_seed(123)\n",
 412     "#tf.random.set_seed(123)\n",
 413     "reproducibility.set_seed()"
 414    ]
 415   },
 416   {
 417    "cell_type": "code",
 418    "execution_count": null,
 419    "id": "9b09eb58-cd1f-4146-86d4-e60260d5f035",
 420    "metadata": {},
 421    "outputs": [],
 422    "source": [
 423     "from utils import vprint\n",
 424     "\n",
 425     "hours = rnn_dat['hours']\n",
 426     "    \n",
 427     "samples = rnn_dat['samples']\n",
 428     "features = rnn_dat['features']\n",
 429     "timesteps = rnn_dat['timesteps']\n",
 430     "    \n",
 431     "model_fit=create_RNN_2(hidden_units=6, \n",
 432     "                        dense_units=1, \n",
 433     "                        batch_shape=(samples,timesteps,features),\n",
 434     "                        stateful=True,\n",
 435     "                        return_sequences=False,\n",
 436     "                        # initial_state=h0,\n",
 437     "                        activation=['linear','linear'],\n",
 438     "                        dense_layers=1)\n",
 439     "\n",
 440     "from keras.utils.vis_utils import plot_model\n",
 441     "plot_model(model_fit, to_file='model_plot.png', \n",
 442     "           show_shapes=True, show_layer_names=True)"
 443    ]
 444   },
 445   {
 446    "cell_type": "code",
 447    "execution_count": null,
 448    "id": "030953b3-be17-40d7-b0b6-7e73e8b92869",
 449    "metadata": {},
 450    "outputs": [],
 451    "source": [
 452     "## Check 2: Untrained RNN initialized with same weights\n",
 453     "\n",
 454     "hash2(model_fit.get_weights())"
 455    ]
 456   },
 457   {
 458    "cell_type": "code",
 459    "execution_count": null,
 460    "id": "d18e09e0-7f47-4bc5-8cee-23752ac5bd5f",
 461    "metadata": {},
 462    "outputs": [],
 463    "source": [
 464     "Et = rnn_dat['Et']\n",
 465     "model_predict=create_RNN_2(hidden_units=6, dense_units=1,  \n",
 466     "                            input_shape=(hours,features),stateful = False,\n",
 467     "                            return_sequences=True,\n",
 468     "                            activation=['linear','linear'],dense_layers=1)"
 469    ]
 470   },
 471   {
 472    "cell_type": "code",
 473    "execution_count": null,
 474    "id": "625a24ef-92ab-4657-87c6-aede6b7b89b2",
 475    "metadata": {},
 476    "outputs": [],
 477    "source": [
 478     "## Check 3: Second model initialization same weights\n",
 479     "\n",
 480     "hash2(model_predict.get_weights())"
 481    ]
 482   },
 483   {
 484    "cell_type": "code",
 485    "execution_count": null,
 486    "id": "797d5c31-9bff-4df9-83f5-cf688f3291d0",
 487    "metadata": {},
 488    "outputs": [],
 489    "source": [
 490     "print(rnn_dat)\n",
 491     "x_train = rnn_dat['x_train']\n",
 492     "y_train = rnn_dat['y_train']\n",
 493     "type(x_train)\n",
 494     "\n",
 495     "# print dimensions and set initial state\n",
 496     "# print('model_fit input shape',x_train.shape,'output shape',model_fit(x_train).shape)\n",
 497     "\n",
 498     "# fitting\n",
 499     "DeltaE = 0\n",
 500     "w_exact=  [np.array([[1.-np.exp(-0.1)]]), np.array([[np.exp(-0.1)]]), np.array([0.]),np.array([[1.0]]),np.array([-1.*DeltaE])]\n",
 501     "    \n",
 502     "w_initial=[np.array([[1.-np.exp(-0.1)]]), \n",
 503     "           np.array([[np.exp(-0.1)]]), \n",
 504     "           np.array([0.]),\n",
 505     "           np.array([[1.0]]),\n",
 506     "           np.array([-1.0])]\n",
 507     "r = 1e-2\n",
 508     "print('randomization of initial weights ',r)\n",
 509     "w=model_fit.get_weights()\n",
 510     "for i in range(len(w)):\n",
 511     "    vprint('weight',i,'shape',w[i].shape,'ndim',w[i].ndim,'given',w_initial[i].shape)\n",
 512     "    for j in range(w[i].shape[0]):\n",
 513     "        if w[i].ndim==2:\n",
 514     "            for k in range(w[i].shape[1]):\n",
 515     "                w[i][j][k]=w_initial[i][0][0]/w[i].shape[0] + np.random.normal(0,1)*r\n",
 516     "        else:\n",
 517     "            w[i][j]=w_initial[i][0] + np.random.normal(0,1)*r\n",
 518     "model_fit.set_weights(w)"
 519    ]
 520   },
 521   {
 522    "cell_type": "code",
 523    "execution_count": null,
 524    "id": "580caac4-ba9a-48b1-86a3-8e97357bf666",
 525    "metadata": {},
 526    "outputs": [],
 527    "source": [
 528     "## Check 4: weights and inputs the same after this step \n",
 529     "\n",
 530     "print(hash2(model_fit.get_weights()))\n",
 531     "print(hash2(x_train))\n",
 532     "print(hash2(y_train))"
 533    ]
 534   },
 535   {
 536    "cell_type": "code",
 537    "execution_count": null,
 538    "id": "33d6b35c",
 539    "metadata": {},
 540    "outputs": [],
 541    "source": [
 542     "print('model_fit input shape',x_train.shape,'output shape',y_train.shape)\n",
 543     "print('x_train',x_train)\n",
 544     "print('y_train',y_train)"
 545    ]
 546   },
 547   {
 548    "cell_type": "code",
 549    "execution_count": null,
 550    "id": "7a2be797",
 551    "metadata": {},
 552    "outputs": [],
 553    "source": [
 554     "reproducibility.set_seed()"
 555    ]
 556   },
 557   {
 558    "cell_type": "code",
 559    "execution_count": null,
 560    "id": "c7857fb4",
 561    "metadata": {},
 562    "outputs": [],
 563    "source": [
 564     "model_fit.get_weights()"
 565    ]
 566   },
 567   {
 568    "cell_type": "code",
 569    "execution_count": null,
 570    "id": "9de62d29-2c91-48b5-a92f-da3deae470ca",
 571    "metadata": {},
 572    "outputs": [],
 573    "source": [
 574     "model_fit.fit(x_train, y_train, epochs=5000, verbose=2, batch_size=samples)\n",
 575     "w_fitted=model_fit.get_weights()\n",
 576     "for i in range(len(w)):\n",
 577     "    vprint('weight',i,' exact:',w_exact[i],':  initial:',w_initial[i],' fitted:',w_fitted[i])\n",
 578     "    \n",
 579     "model_predict.set_weights(w_fitted)"
 580    ]
 581   },
 582   {
 583    "cell_type": "code",
 584    "execution_count": null,
 585    "id": "e108b89d-0747-41bd-91fc-5716375d9d2b",
 586    "metadata": {},
 587    "outputs": [],
 588    "source": [
 589     "## Check 5: Weights NOT the same after fitting\n",
 590     "\n",
 591     "hash2(model_fit.get_weights())"
 592    ]
 593   },
 594   {
 595    "cell_type": "code",
 596    "execution_count": null,
 597    "id": "0fabcef2",
 598    "metadata": {},
 599    "outputs": [],
 600    "source": [
 601     "model_fit.get_weights()"
 602    ]
 603   },
 604   {
 605    "cell_type": "code",
 606    "execution_count": null,
 607    "id": "2cece158-e7db-4d33-be09-03022e805b06",
 608    "metadata": {},
 609    "outputs": [],
 610    "source": [
 611     "model_fit.get_config()"
 612    ]
 613   },
 614   {
 615    "cell_type": "code",
 616    "execution_count": null,
 617    "id": "a7c0a46d-4fe3-4d8c-8411-f95ce2b465ce",
 618    "metadata": {},
 619    "outputs": [],
 620    "source": [
 621     "## RNN weights repeated in odd way that looks like untrained\n",
 622     "\n",
 623     "model_fit.get_weights()"
 624    ]
 625   },
 626   {
 627    "cell_type": "code",
 628    "execution_count": null,
 629    "id": "438b3b9c-7e7a-48a0-ac22-0e3ab4460e57",
 630    "metadata": {},
 631    "outputs": [],
 632    "source": [
 633     "# evaluate model\n",
 634     "model_predict.set_weights(w_fitted)\n",
 635     "x_input=np.reshape(Et,(1, hours, 2))\n",
 636     "y_output = model_predict.predict(x_input)\n",
 637     "print('x_input.shape=',x_input.shape,'y_output.shape=',y_output.shape)\n",
 638     "# print(shift)\n",
 639     "m = np.reshape(y_output,hours)\n",
 640     "fitted_data = case_data.copy()\n",
 641     "fitted_data.update({'m':m,'title':\"First RNN forecast\"})"
 642    ]
 643   },
 644   {
 645    "cell_type": "code",
 646    "execution_count": null,
 647    "id": "94e9a5c6",
 648    "metadata": {},
 649    "outputs": [],
 650    "source": []
 651   }
 652  ],
 653  "metadata": {
 654   "kernelspec": {
 655    "display_name": "Python 3 (ipykernel)",
 656    "language": "python",
 657    "name": "python3"
 658   },
 659   "language_info": {
 660    "codemirror_mode": {
 661     "name": "ipython",
 662     "version": 3
 663    },
 664    "file_extension": ".py",
 665    "mimetype": "text/x-python",
 666    "name": "python",
 667    "nbconvert_exporter": "python",
 668    "pygments_lexer": "ipython3",
 669    "version": "3.10.9"
 670   }
 671  },
 672  "nbformat": 4,
 673  "nbformat_minor": 5
 674 }