From b5c248b037ee777a2e0615989ce0e026a097fd8b Mon Sep 17 00:00:00 2001
From: Jan <jan.mandel@gmail.com>
Date: Sat, 2 Jul 2022 15:31:48 -0600
Subject: [PATCH] isolated create_synthetic_data

---
 fmda_kf_rnn.ipynb | 149 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 87 insertions(+), 62 deletions(-)

diff --git a/fmda_kf_rnn.ipynb b/fmda_kf_rnn.ipynb
index 832e85c..679665b 100644
--- a/fmda_kf_rnn.ipynb
+++ b/fmda_kf_rnn.ipynb
@@ -629,42 +629,64 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "id": "my6nnrk1iQo8"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
         "id": "-_pz-wXnCMnP"
       },
       "outputs": [],
       "source": [
-        "import numpy as np, random\n",
-        "days = 20       \n",
-        "hours = days*24\n",
-        "h2 = int(hours/2)\n",
-        "hour = np.array(range(hours))\n",
-        "day = np.array(range(hours))/24.\n",
-        "\n",
-        "# artificial equilibrium data\n",
-        "E = np.power(np.sin(np.pi*day),4) # diurnal curve\n",
-        "E = 0.05+0.25*E\n",
-        "# FMC free run\n",
-        "m_f = np.zeros(hours)\n",
-        "m_f[0] = 0.1         # initial FMC\n",
-        "process_noise = 0.005\n",
-        "process_noise=0.\n",
-        "for t in range(hours-1):\n",
-        "  m_f[t+1] = max(0.,model_decay(m_f[t],E[t])  + random.gauss(0,process_noise) )\n",
-        "data = m_f + np.random.normal(loc=0,scale=0.01,size=hours)    \n",
+        "def create_synthetic_data(days=20,power=4,data_noise=0.02,process_noise=0.0,DeltaE=0.0):\n",
+        "  import numpy as np, random\n",
+        "  hours = days*24\n",
+        "  h2 = int(hours/2)\n",
+        "  hour = np.array(range(hours))\n",
+        "  day = np.array(range(hours))/24.\n",
+        "\n",
+        "  # artificial equilibrium data\n",
+        "  E = np.power(np.sin(np.pi*day),4) # diurnal curve\n",
+        "  E = 0.05+0.25*E\n",
+        "  # FMC free run\n",
+        "  m_f = np.zeros(hours)\n",
+        "  m_f[0] = 0.1         # initial FMC\n",
+        "  process_noise=0.\n",
+        "  for t in range(hours-1):\n",
+        "    m_f[t+1] = max(0.,model_decay(m_f[t],E[t])  + random.gauss(0,process_noise) )\n",
+        "  data = m_f + np.random.normal(loc=0,scale=data_noise,size=hours)\n",
+        "  E = E + DeltaE    \n",
         "\n",
-        "%matplotlib inline\n",
-        "import matplotlib.pyplot as plt \n",
-        "# fig1, ax1 = plt.subplots()\n",
+        "  %matplotlib inline\n",
+        "  import matplotlib.pyplot as plt \n",
+        "  # fig1, ax1 = plt.subplots()\n",
         "\n",
-        "plt.figure(figsize=(16,4))\n",
-        "plt.plot(hour,E,linestyle='--',c='r',label='Equilibrium')\n",
-        "plt.plot(hour,m_f,linestyle='-',c='k',label='10-h fuel truth')\n",
-        "plt.scatter(hour[:h2],data[:h2],c='b',label='10-h fuel data')\n",
-        "plt.title('Kalman filter example on artificial data')\n",
-        "plt.xlabel('Time (hours)')\n",
-        "plt.ylabel('Fuel moisture content (%)')\n",
-        "plt.legend()\n",
-        " "
+        "  plt.figure(figsize=(16,4))\n",
+        "  plt.plot(hour,E,linestyle='--',c='r',label='Equilibrium')\n",
+        "  plt.plot(hour,m_f,linestyle='-',c='k',label='10-h fuel truth')\n",
+        "  plt.scatter(hour[:h2],data[:h2],c='b',label='10-h fuel data')\n",
+        "  plt.title('Synthetic data')\n",
+        "  plt.xlabel('Time (hours)')\n",
+        "  plt.ylabel('Fuel moisture content (%)')\n",
+        "  plt.legend()\n",
+        "  return E,m_f,data,hour,h2\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GfRxLp4HkcVz"
+      },
+      "outputs": [],
+      "source": [
+        "E,m_f,data,hour,h2 = create_synthetic_data(days=20,power=4,data_noise=0.000,process_noise=0.0,DeltaE=0.0) "
       ]
     },
     {
@@ -698,7 +720,8 @@
         "\n",
         "# using global E, m_f\n",
         "\n",
-        "def plot_m(m,Ec=None,title=None,):\n",
+        "def plot_m(m,Ec=None,title=None,):  # global hour\n",
+        "  hours=hour.shape[0]\n",
         "  %matplotlib inline\n",
         "  plt.figure(figsize=(16,4))\n",
         "  plt.plot(hour,E,linestyle='--',c='r',label='E=Equilibrium data')\n",
@@ -719,7 +742,7 @@
         "  plt.legend()\n",
         "\n",
         "def kf_example(DeltaE):\n",
-        "  h2 = int(hours/2)\n",
+        "  hours=hour.shape[0]\n",
         "  m = np.zeros(hours)\n",
         "  m[0]=0.1             # background state  \n",
         "  P = np.zeros(hours)\n",
@@ -1011,6 +1034,7 @@
       "outputs": [],
       "source": [
         "def augmented_example(DeltaE):\n",
+        "  hours=hour.shape[0]\n",
         "  h2 = int(hours/2)\n",
         "  m, Ec = run_augmented_kf(data,E+DeltaE,E+DeltaE,0*E,h2,hours)  # data, E, hours are global\n",
         "  return m, Ec"
@@ -1116,6 +1140,11 @@
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H3RTQCDG9q-4"
+      },
+      "outputs": [],
       "source": [
         "import numpy as np\n",
         "import tensorflow as tf\n",
@@ -1128,12 +1157,7 @@
         "import matplotlib.pyplot as plt\n",
         "import tensorflow as tf\n",
         "import keras.backend as K"
-      ],
-      "metadata": {
-        "id": "H3RTQCDG9q-4"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
@@ -1301,6 +1325,11 @@
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FzOotSFf-tPR"
+      },
+      "outputs": [],
       "source": [
         "def seq2batches(x,y,timesteps,trainsteps):\n",
         "  # x [trainsteps+forecaststeps,features]    all inputs\n",
@@ -1326,15 +1355,15 @@
         "      for j in range(outputs):\n",
         "        y_train[i,k,j] = y[i+k,j]  # return sequences\n",
         "  return x_train, y_train"
-      ],
-      "metadata": {
-        "id": "FzOotSFf-tPR"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Kg7wSrkk-HrE"
+      },
+      "outputs": [],
       "source": [
         "print('test preprocessing for RNN')\n",
         "trainsteps=5\n",
@@ -1351,12 +1380,7 @@
         "x_train, y_train = seq2batches(x,y,timesteps,trainsteps)\n",
         "print('x_train=',x_train)\n",
         "print('y_train=',y_train)"
-      ],
-      "metadata": {
-        "id": "Kg7wSrkk-HrE"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
@@ -1366,6 +1390,7 @@
       },
       "outputs": [],
       "source": [
+        "E,m_f,data,hour,h2 = create_synthetic_data(days=20,power=4,data_noise=0.000,process_noise=0.0,DeltaE=0.0) \n",
         "scalerx = MinMaxScaler()\n",
         "Et = np.reshape(E,[E.shape[0],1])\n",
         "scalerx.fit(Et)\n",
@@ -1382,6 +1407,11 @@
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PaHfJW7mSJE1"
+      },
+      "outputs": [],
       "source": [
         "def create_RNN_2(hidden_units, dense_units, activation, stateful=False, batch_shape=None, input_shape=None):\n",
         "    if stateful:\n",
@@ -1410,15 +1440,15 @@
         "                        activation=activation)\n",
         "    print(model_predict.summary())\n",
         "    return model_fit, model_predict"
-      ],
-      "metadata": {
-        "id": "PaHfJW7mSJE1"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "R2jkoZlAIaSb"
+      },
+      "outputs": [],
       "source": [
         "fmda_model, fmda_model_eval = create_fit_predict_RNN(hidden_units=7, dense_units=1, \n",
         "      samples=samples, timesteps=timesteps, features=1, \n",
@@ -1431,12 +1461,7 @@
         "mt = fmda_model_eval.predict(Et)\n",
         "m = scalery.inverse_transform(mt)\n",
         "plot_m(m,title='RNN prediction')"
-      ],
-      "metadata": {
-        "id": "R2jkoZlAIaSb"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
-- 
2.11.4.GIT