From fc037d17d2459976c92d1352837070d092d94254 Mon Sep 17 00:00:00 2001 From: Jan Mandel Date: Wed, 17 Apr 2024 10:33:53 -0600 Subject: [PATCH] fmda_rnn_rain.ipynb check 5 ok --- fmda/fmda_rnn_rain.ipynb | 4 +- fmda/fmda_rnn_rain_output.ipynb | 282 +++++++++++++++++++++------------------- fmda/moisture_rnn.py | 38 +++--- fmda/moisture_rnn_pkl.py | 9 +- fmda/test-plk2train.ipynb | 76 ++++++++++- 5 files changed, 247 insertions(+), 162 deletions(-) diff --git a/fmda/fmda_rnn_rain.ipynb b/fmda/fmda_rnn_rain.ipynb index 17f6d8f..37ad4d8 100644 --- a/fmda/fmda_rnn_rain.ipynb +++ b/fmda/fmda_rnn_rain.ipynb @@ -205,7 +205,9 @@ "id": "d64157c2-24e6-4e42-9c8d-0be45ce0c529", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "logging.info('fmda_rnn_rain.ipynb done')" + ] }, { "cell_type": "code", diff --git a/fmda/fmda_rnn_rain_output.ipynb b/fmda/fmda_rnn_rain_output.ipynb index be29c16..738bd61 100644 --- a/fmda/fmda_rnn_rain_output.ipynb +++ b/fmda/fmda_rnn_rain_output.ipynb @@ -159,8 +159,8 @@ "After fixing, remained 0 nan values\n", "WARNING: case case13 variable Ew shape (1200,) has 1 nan values, fixing\n", "After fixing, remained 0 nan values\n", - "2024-04-17 02:29:39,265 - INFO - testing datasets test_dict.keys():dict_keys(['case1', 'case2', 'case3', 'case4', 'case5', 'case6', 'case7', 'case8', 'case9', 'case10', 'case11', 'case12', 'case13'])\n", - "2024-04-17 02:29:39,268 - INFO - reproducibity dataset repro_dict.keys(): dict_keys(['case11'])\n" + "2024-04-17 09:33:45,908 - INFO - testing datasets test_dict.keys():dict_keys(['case1', 'case2', 'case3', 'case4', 'case5', 'case6', 'case7', 'case8', 'case9', 'case10', 'case11', 'case12', 'case13'])\n", + "2024-04-17 09:33:45,910 - INFO - reproducibity dataset repro_dict.keys(): dict_keys(['case11'])\n" ] } ], @@ -183,7 +183,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-04-17 02:29:39,283 - INFO - params_sets_keys=['0']\n", + "2024-04-17 09:33:45,924 - INFO - params_sets_keys=['0']\n", "i= 0\n", "cases= ['case11']\n", "case= case11 RNN Orig\n", @@ -193,16 +193,15 @@ "{'id': 0, 'purpose': 'reproducibility', 'batch_size': inf, 'training': None, 'cases': ['case11'], 'scale': 0, 'rain_do': False, 'verbose': 1, 'timesteps': 5, 'activation': ['linear', 'linear'], 'centering': [0.0, 0.0], 'hidden_units': 6, 'dense_units': 1, 'dense_layers': 1, 'DeltaE': [0, -1], 'synthetic': False, 'T1': 0.1, 'fm_raise_vs_rain': 2.0, 'epochs': 5000, 'verbose_fit': 0, 'verbose_weights': False, 'note': 'check 5 should give zero error', 'initialize': True}\n", "case11 Training 1 to 300 hours RMSE: 0.3305\n", "case11 Prediction 301 to 854 hours RMSE: 1.0984\n", - "2024-04-17 02:29:39,383 - INFO - run_rnn start\n", + "2024-04-17 09:33:46,027 - INFO - run_rnn start\n", "resetting random seeds to 123\n", - "2024-04-17 02:29:39,384 - INFO - create_rnn_data_1 start\n", - "2024-04-17 02:29:39,385 - INFO - create_rnn_data_1: hours=None h2=None\n", - "2024-04-17 02:29:39,386 - INFO - feature matrix X shape (854, 2)\n", - "2024-04-17 02:29:39,386 - INFO - target matrix Y shape (854, 1)\n", - "2024-04-17 02:29:39,387 - INFO - features_list: ['Ed', 'Ew']\n", - "2024-04-17 02:29:39,388 - INFO - create_rnn_data_2 start\n", - "2024-04-17 02:29:39,388 - INFO - create_rnn_data_2: hours=None h2=None\n", - "2024-04-17 02:29:39,389 - INFO - batch_size=inf\n", + "2024-04-17 09:33:46,028 - INFO - create_rnn_data_1 start\n", + "2024-04-17 09:33:46,029 - INFO - create_rnn_data_1: hours=None h2=None\n", + "2024-04-17 09:33:46,030 - INFO - feature matrix X shape (854, 2)\n", + "2024-04-17 09:33:46,030 - INFO - target matrix Y shape (854, 1)\n", + "2024-04-17 09:33:46,031 - INFO - features_list: ['Ed', 'Ew']\n", + "2024-04-17 09:33:46,032 - INFO - create_rnn_data_2 start\n", + "2024-04-17 09:33:46,033 - INFO - batch_size=inf\n", "staircase: shape x = (854, 2)\n", "staircase: shape y = (854, 1)\n", "staircase: timesteps= 5\n", @@ -210,40 +209,41 @@ "staircase: return_sequences= False\n", "staircase: samples= 296 timesteps= 5 features= 2\n", "returning only the last timestep in a sample\n", - "2024-04-17 02:29:39,396 - INFO - x_train shape=(296, 5, 2)\n", - "2024-04-17 02:29:39,397 - INFO - y_train shape=(296, 1)\n", - "2024-04-17 02:29:39,416 - INFO - create_rnn_data_2 done\n", + "2024-04-17 09:33:46,039 - INFO - x_train shape=(296, 5, 2)\n", + "2024-04-17 09:33:46,040 - INFO - y_train shape=(296, 1)\n", + "2024-04-17 09:33:46,058 - INFO - create_rnn_data_2 done\n", "rnn_dat\n", - "items: ['rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'case', 'hours', 'x_train', 'y_train', 'X', 'samples', 'timesteps', 'features', 'h0', 'h2']\n", + "items: ['case', 'hours', 'h2', 'rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'X', 'Y', 'x_train', 'y_train', 'samples', 'timesteps', 'features']\n", + "case = case11 \n", + "hours = 854 \n", + "h2 = 300 \n", "rain_do = False \n", "features_list = ['Ed', 'Ew'] \n", "scale = 0 \n", "scale_fm = 1.0 \n", "scale_rain = 1.0 \n", - "case = case11 \n", - "hours = 854 \n", + "\n", + "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", + "\n", + "array Y shape (854, 1) min 1.6 max 8.1 hash 9476624128859380674 type \n", "\n", "array x_train shape (296, 5, 2) min 1.6974029070178052 max 16.49195767995983 hash 14769812307992047695 type \n", "\n", "array y_train shape (296, 1) min 3.0 max 7.9 hash 10119650379183749962 type \n", - "\n", - "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", "samples = 296 \n", "timesteps = 5 \n", "features = 2 \n", - "array h0 shape (296, 2) min 1.697403 max 16.491959 type \n", - "h2 = 300 \n", - "2024-04-17 02:29:39,419 - INFO - train_rnn start, hours=854 fit=False\n", - "2024-04-17 02:29:39,420 - INFO - case = case11\n", - "2024-04-17 02:29:39,420 - INFO - samples = 296\n", - "2024-04-17 02:29:39,421 - INFO - features = 2\n", - "2024-04-17 02:29:39,422 - INFO - timesteps = 5\n", - "2024-04-17 02:29:39,422 - INFO - centering = [0.0, 0.0]\n", - "2024-04-17 02:29:39,423 - INFO - training = None\n", - "2024-04-17 02:29:39,424 - INFO - batch_size = inf\n", - "2024-04-17 02:29:39,424 - INFO - initialize = True\n", - "2024-04-17 02:29:39,425 - INFO - replacing batch_size by 296\n", - "2024-04-17 02:29:39,426 - INFO - epochs = 5000\n", + "2024-04-17 09:33:46,061 - INFO - train_rnn start, hours=854 fit=False\n", + "2024-04-17 09:33:46,062 - INFO - case = case11\n", + "2024-04-17 09:33:46,063 - INFO - samples = 296\n", + "2024-04-17 09:33:46,064 - INFO - features = 2\n", + "2024-04-17 09:33:46,064 - INFO - timesteps = 5\n", + "2024-04-17 09:33:46,065 - INFO - centering = [0.0, 0.0]\n", + "2024-04-17 09:33:46,066 - INFO - training = None\n", + "2024-04-17 09:33:46,067 - INFO - batch_size = inf\n", + "2024-04-17 09:33:46,067 - INFO - initialize = True\n", + "2024-04-17 09:33:46,068 - INFO - replacing batch_size by 296\n", + "2024-04-17 09:33:46,069 - INFO - epochs = 5000\n", "Function: moisture_rnn.create_RNN_2\n", "Arguments:\n", " hidden_units = 6\n", @@ -293,7 +293,7 @@ "initializing weights\n", "initial weights hash = 3.735307973219784e+19\n", "Fitting skipped, using initial weights\n", - "1/1 [==============================] - 0s 280ms/step\n", + "1/1 [==============================] - 0s 282ms/step\n", "x_input.shape= (1, 854, 2) y_output.shape= (1, 854, 1)\n", "check - hash weights: 3.735307973219784e+19\n" ] @@ -332,19 +332,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-04-17 02:29:40,775 - INFO - run_rnn end\n", + "2024-04-17 09:33:47,401 - INFO - run_rnn end\n", "case11 Training 1 to 300 hours RMSE: 1.327\n", "case11 Prediction 301 to 854 hours RMSE: 1.5834\n", - "2024-04-17 02:29:40,776 - INFO - run_rnn start\n", + "2024-04-17 09:33:47,402 - INFO - run_rnn start\n", "resetting random seeds to 123\n", - "2024-04-17 02:29:40,781 - INFO - create_rnn_data_1 start\n", - "2024-04-17 02:29:40,781 - INFO - create_rnn_data_1: hours=None h2=None\n", - "2024-04-17 02:29:40,782 - INFO - feature matrix X shape (854, 2)\n", - "2024-04-17 02:29:40,783 - INFO - target matrix Y shape (854, 1)\n", - "2024-04-17 02:29:40,783 - INFO - features_list: ['Ed', 'Ew']\n", - "2024-04-17 02:29:40,784 - INFO - create_rnn_data_2 start\n", - "2024-04-17 02:29:40,785 - INFO - create_rnn_data_2: hours=None h2=None\n", - "2024-04-17 02:29:40,785 - INFO - batch_size=inf\n", + "2024-04-17 09:33:47,406 - INFO - create_rnn_data_1 start\n", + "2024-04-17 09:33:47,407 - INFO - create_rnn_data_1: hours=None h2=None\n", + "2024-04-17 09:33:47,407 - INFO - feature matrix X shape (854, 2)\n", + "2024-04-17 09:33:47,408 - INFO - target matrix Y shape (854, 1)\n", + "2024-04-17 09:33:47,409 - INFO - features_list: ['Ed', 'Ew']\n", + "2024-04-17 09:33:47,410 - INFO - create_rnn_data_2 start\n", + "2024-04-17 09:33:47,410 - INFO - batch_size=inf\n", "staircase: shape x = (854, 2)\n", "staircase: shape y = (854, 1)\n", "staircase: timesteps= 5\n", @@ -352,40 +351,41 @@ "staircase: return_sequences= False\n", "staircase: samples= 296 timesteps= 5 features= 2\n", "returning only the last timestep in a sample\n", - "2024-04-17 02:29:40,793 - INFO - x_train shape=(296, 5, 2)\n", - "2024-04-17 02:29:40,794 - INFO - y_train shape=(296, 1)\n", - "2024-04-17 02:29:40,796 - INFO - create_rnn_data_2 done\n", + "2024-04-17 09:33:47,419 - INFO - x_train shape=(296, 5, 2)\n", + "2024-04-17 09:33:47,420 - INFO - y_train shape=(296, 1)\n", + "2024-04-17 09:33:47,421 - INFO - create_rnn_data_2 done\n", "rnn_dat\n", - "items: ['rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'case', 'hours', 'x_train', 'y_train', 'X', 'samples', 'timesteps', 'features', 'h0', 'h2']\n", + "items: ['case', 'hours', 'h2', 'rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'X', 'Y', 'x_train', 'y_train', 'samples', 'timesteps', 'features']\n", + "case = case11 \n", + "hours = 854 \n", + "h2 = 300 \n", "rain_do = False \n", "features_list = ['Ed', 'Ew'] \n", "scale = 0 \n", "scale_fm = 1.0 \n", "scale_rain = 1.0 \n", - "case = case11 \n", - "hours = 854 \n", + "\n", + "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", + "\n", + "array Y shape (854, 1) min 1.6 max 8.1 hash 9476624128859380674 type \n", "\n", "array x_train shape (296, 5, 2) min 1.6974029070178052 max 16.49195767995983 hash 14769812307992047695 type \n", "\n", "array y_train shape (296, 1) min 3.0 max 7.9 hash 10119650379183749962 type \n", - "\n", - "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", "samples = 296 \n", "timesteps = 5 \n", "features = 2 \n", - "array h0 shape (296, 2) min 1.697403 max 16.491959 type \n", - "h2 = 300 \n", - "2024-04-17 02:29:40,798 - INFO - train_rnn start, hours=854 fit=True\n", - "2024-04-17 02:29:40,799 - INFO - case = case11\n", - "2024-04-17 02:29:40,799 - INFO - samples = 296\n", - "2024-04-17 02:29:40,800 - INFO - features = 2\n", - "2024-04-17 02:29:40,801 - INFO - timesteps = 5\n", - "2024-04-17 02:29:40,802 - INFO - centering = [0.0, 0.0]\n", - "2024-04-17 02:29:40,803 - INFO - training = None\n", - "2024-04-17 02:29:40,804 - INFO - batch_size = inf\n", - "2024-04-17 02:29:40,804 - INFO - initialize = True\n", - "2024-04-17 02:29:40,805 - INFO - replacing batch_size by 296\n", - "2024-04-17 02:29:40,806 - INFO - epochs = 5000\n", + "2024-04-17 09:33:47,423 - INFO - train_rnn start, hours=854 fit=True\n", + "2024-04-17 09:33:47,424 - INFO - case = case11\n", + "2024-04-17 09:33:47,425 - INFO - samples = 296\n", + "2024-04-17 09:33:47,425 - INFO - features = 2\n", + "2024-04-17 09:33:47,426 - INFO - timesteps = 5\n", + "2024-04-17 09:33:47,427 - INFO - centering = [0.0, 0.0]\n", + "2024-04-17 09:33:47,427 - INFO - training = None\n", + "2024-04-17 09:33:47,428 - INFO - batch_size = inf\n", + "2024-04-17 09:33:47,429 - INFO - initialize = True\n", + "2024-04-17 09:33:47,429 - INFO - replacing batch_size by 296\n", + "2024-04-17 09:33:47,430 - INFO - epochs = 5000\n", "Function: moisture_rnn.create_RNN_2\n", "Arguments:\n", " hidden_units = 6\n", @@ -434,7 +434,7 @@ "y_train hash = 10119650379183749962\n", "initializing weights\n", "initial weights hash = 3.735307973219784e+19\n", - "2024-04-17 02:29:41,087 - INFO - verbose_fit = 0\n" + "2024-04-17 09:33:47,708 - INFO - verbose_fit = 0\n" ] }, { @@ -452,7 +452,7 @@ "output_type": "stream", "text": [ "fitted weights hash = 5.55077327554663e+19\n", - "1/1 [==============================] - 0s 251ms/step\n", + "1/1 [==============================] - 0s 241ms/step\n", "x_input.shape= (1, 854, 2) y_output.shape= (1, 854, 1)\n", "check 5: 5.55077327554663e+19 should be 5.55077327554663e+19 error 0.0\n", "checkm= 3.77920889854431152 error 0.0\n", @@ -473,7 +473,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-04-17 02:29:57,292 - INFO - run_rnn end\n", + "2024-04-17 09:34:02,691 - INFO - run_rnn end\n", "case11 Training 1 to 300 hours RMSE: 0.7998\n", "case11 Prediction 301 to 854 hours RMSE: 0.733\n", "*** params 0 case case11 summary ***\n", @@ -507,16 +507,15 @@ "{'id': 0, 'purpose': 'reproducibility', 'batch_size': inf, 'training': None, 'cases': ['case11'], 'scale': 0, 'rain_do': False, 'verbose': 1, 'timesteps': 5, 'activation': ['linear', 'linear'], 'centering': [0.0, 0.0], 'hidden_units': 6, 'dense_units': 1, 'dense_layers': 1, 'DeltaE': [0, -1], 'synthetic': False, 'T1': 0.1, 'fm_raise_vs_rain': 2.0, 'epochs': 5000, 'verbose_fit': 0, 'verbose_weights': False, 'note': 'check 5 should give zero error', 'initialize': False}\n", "case11 Training 1 to 300 hours RMSE: 0.3305\n", "case11 Prediction 301 to 854 hours RMSE: 1.0984\n", - "2024-04-17 02:29:57,384 - INFO - run_rnn start\n", + "2024-04-17 09:34:02,779 - INFO - run_rnn start\n", "resetting random seeds to 123\n", - "2024-04-17 02:29:57,391 - INFO - create_rnn_data_1 start\n", - "2024-04-17 02:29:57,392 - INFO - create_rnn_data_1: hours=None h2=None\n", - "2024-04-17 02:29:57,393 - INFO - feature matrix X shape (854, 2)\n", - "2024-04-17 02:29:57,393 - INFO - target matrix Y shape (854, 1)\n", - "2024-04-17 02:29:57,394 - INFO - features_list: ['Ed', 'Ew']\n", - "2024-04-17 02:29:57,395 - INFO - create_rnn_data_2 start\n", - "2024-04-17 02:29:57,395 - INFO - create_rnn_data_2: hours=None h2=None\n", - "2024-04-17 02:29:57,396 - INFO - batch_size=inf\n", + "2024-04-17 09:34:02,786 - INFO - create_rnn_data_1 start\n", + "2024-04-17 09:34:02,786 - INFO - create_rnn_data_1: hours=None h2=None\n", + "2024-04-17 09:34:02,787 - INFO - feature matrix X shape (854, 2)\n", + "2024-04-17 09:34:02,788 - INFO - target matrix Y shape (854, 1)\n", + "2024-04-17 09:34:02,788 - INFO - features_list: ['Ed', 'Ew']\n", + "2024-04-17 09:34:02,789 - INFO - create_rnn_data_2 start\n", + "2024-04-17 09:34:02,790 - INFO - batch_size=inf\n", "staircase: shape x = (854, 2)\n", "staircase: shape y = (854, 1)\n", "staircase: timesteps= 5\n", @@ -524,40 +523,41 @@ "staircase: return_sequences= False\n", "staircase: samples= 296 timesteps= 5 features= 2\n", "returning only the last timestep in a sample\n", - "2024-04-17 02:29:57,404 - INFO - x_train shape=(296, 5, 2)\n", - "2024-04-17 02:29:57,405 - INFO - y_train shape=(296, 1)\n", - "2024-04-17 02:29:57,407 - INFO - create_rnn_data_2 done\n", + "2024-04-17 09:34:02,796 - INFO - x_train shape=(296, 5, 2)\n", + "2024-04-17 09:34:02,797 - INFO - y_train shape=(296, 1)\n", + "2024-04-17 09:34:02,798 - INFO - create_rnn_data_2 done\n", "rnn_dat\n", - "items: ['rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'case', 'hours', 'x_train', 'y_train', 'X', 'samples', 'timesteps', 'features', 'h0', 'h2']\n", + "items: ['case', 'hours', 'h2', 'rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'X', 'Y', 'x_train', 'y_train', 'samples', 'timesteps', 'features']\n", + "case = case11 \n", + "hours = 854 \n", + "h2 = 300 \n", "rain_do = False \n", "features_list = ['Ed', 'Ew'] \n", "scale = 0 \n", "scale_fm = 1.0 \n", "scale_rain = 1.0 \n", - "case = case11 \n", - "hours = 854 \n", + "\n", + "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", + "\n", + "array Y shape (854, 1) min 1.6 max 8.1 hash 9476624128859380674 type \n", "\n", "array x_train shape (296, 5, 2) min 1.6974029070178052 max 16.49195767995983 hash 14769812307992047695 type \n", "\n", "array y_train shape (296, 1) min 3.0 max 7.9 hash 10119650379183749962 type \n", - "\n", - "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", "samples = 296 \n", "timesteps = 5 \n", "features = 2 \n", - "array h0 shape (296, 2) min 1.697403 max 16.491959 type \n", - "h2 = 300 \n", - "2024-04-17 02:29:57,409 - INFO - train_rnn start, hours=854 fit=False\n", - "2024-04-17 02:29:57,410 - INFO - case = case11\n", - "2024-04-17 02:29:57,410 - INFO - samples = 296\n", - "2024-04-17 02:29:57,411 - INFO - features = 2\n", - "2024-04-17 02:29:57,412 - INFO - timesteps = 5\n", - "2024-04-17 02:29:57,412 - INFO - centering = [0.0, 0.0]\n", - "2024-04-17 02:29:57,413 - INFO - training = None\n", - "2024-04-17 02:29:57,414 - INFO - batch_size = inf\n", - "2024-04-17 02:29:57,414 - INFO - initialize = False\n", - "2024-04-17 02:29:57,415 - INFO - replacing batch_size by 296\n", - "2024-04-17 02:29:57,415 - INFO - epochs = 5000\n", + "2024-04-17 09:34:02,800 - INFO - train_rnn start, hours=854 fit=False\n", + "2024-04-17 09:34:02,801 - INFO - case = case11\n", + "2024-04-17 09:34:02,802 - INFO - samples = 296\n", + "2024-04-17 09:34:02,802 - INFO - features = 2\n", + "2024-04-17 09:34:02,803 - INFO - timesteps = 5\n", + "2024-04-17 09:34:02,804 - INFO - centering = [0.0, 0.0]\n", + "2024-04-17 09:34:02,804 - INFO - training = None\n", + "2024-04-17 09:34:02,805 - INFO - batch_size = inf\n", + "2024-04-17 09:34:02,806 - INFO - initialize = False\n", + "2024-04-17 09:34:02,806 - INFO - replacing batch_size by 296\n", + "2024-04-17 09:34:02,807 - INFO - epochs = 5000\n", "Function: moisture_rnn.create_RNN_2\n", "Arguments:\n", " hidden_units = 6\n", @@ -606,7 +606,7 @@ "y_train hash = 10119650379183749962\n", "NOT initializing weights\n", "Fitting skipped, using initial weights\n", - "1/1 [==============================] - 0s 272ms/step\n", + "1/1 [==============================] - 0s 240ms/step\n", "x_input.shape= (1, 854, 2) y_output.shape= (1, 854, 1)\n", "check - hash weights: 2.7903911808504898e+19\n" ] @@ -645,19 +645,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-04-17 02:29:58,680 - INFO - run_rnn end\n", + "2024-04-17 09:34:03,987 - INFO - run_rnn end\n", "case11 Training 1 to 300 hours RMSE: 14.5875\n", "case11 Prediction 301 to 854 hours RMSE: 12.7463\n", - "2024-04-17 02:29:58,681 - INFO - run_rnn start\n", + "2024-04-17 09:34:03,988 - INFO - run_rnn start\n", "resetting random seeds to 123\n", - "2024-04-17 02:29:58,685 - INFO - create_rnn_data_1 start\n", - "2024-04-17 02:29:58,686 - INFO - create_rnn_data_1: hours=None h2=None\n", - "2024-04-17 02:29:58,687 - INFO - feature matrix X shape (854, 2)\n", - "2024-04-17 02:29:58,688 - INFO - target matrix Y shape (854, 1)\n", - "2024-04-17 02:29:58,688 - INFO - features_list: ['Ed', 'Ew']\n", - "2024-04-17 02:29:58,689 - INFO - create_rnn_data_2 start\n", - "2024-04-17 02:29:58,690 - INFO - create_rnn_data_2: hours=None h2=None\n", - "2024-04-17 02:29:58,691 - INFO - batch_size=inf\n", + "2024-04-17 09:34:03,992 - INFO - create_rnn_data_1 start\n", + "2024-04-17 09:34:03,993 - INFO - create_rnn_data_1: hours=None h2=None\n", + "2024-04-17 09:34:03,994 - INFO - feature matrix X shape (854, 2)\n", + "2024-04-17 09:34:03,994 - INFO - target matrix Y shape (854, 1)\n", + "2024-04-17 09:34:03,995 - INFO - features_list: ['Ed', 'Ew']\n", + "2024-04-17 09:34:03,996 - INFO - create_rnn_data_2 start\n", + "2024-04-17 09:34:03,996 - INFO - batch_size=inf\n", "staircase: shape x = (854, 2)\n", "staircase: shape y = (854, 1)\n", "staircase: timesteps= 5\n", @@ -665,40 +664,41 @@ "staircase: return_sequences= False\n", "staircase: samples= 296 timesteps= 5 features= 2\n", "returning only the last timestep in a sample\n", - "2024-04-17 02:29:58,699 - INFO - x_train shape=(296, 5, 2)\n", - "2024-04-17 02:29:58,699 - INFO - y_train shape=(296, 1)\n", - "2024-04-17 02:29:58,701 - INFO - create_rnn_data_2 done\n", + "2024-04-17 09:34:04,003 - INFO - x_train shape=(296, 5, 2)\n", + "2024-04-17 09:34:04,004 - INFO - y_train shape=(296, 1)\n", + "2024-04-17 09:34:04,005 - INFO - create_rnn_data_2 done\n", "rnn_dat\n", - "items: ['rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'case', 'hours', 'x_train', 'y_train', 'X', 'samples', 'timesteps', 'features', 'h0', 'h2']\n", + "items: ['case', 'hours', 'h2', 'rain_do', 'features_list', 'scale', 'scale_fm', 'scale_rain', 'X', 'Y', 'x_train', 'y_train', 'samples', 'timesteps', 'features']\n", + "case = case11 \n", + "hours = 854 \n", + "h2 = 300 \n", "rain_do = False \n", "features_list = ['Ed', 'Ew'] \n", "scale = 0 \n", "scale_fm = 1.0 \n", "scale_rain = 1.0 \n", - "case = case11 \n", - "hours = 854 \n", + "\n", + "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", + "\n", + "array Y shape (854, 1) min 1.6 max 8.1 hash 9476624128859380674 type \n", "\n", "array x_train shape (296, 5, 2) min 1.6974029070178052 max 16.49195767995983 hash 14769812307992047695 type \n", "\n", "array y_train shape (296, 1) min 3.0 max 7.9 hash 10119650379183749962 type \n", - "\n", - "array X shape (854, 2) min 1.0385669966012008 max 17.076346687085564 hash 8667321837928673541 type \n", "samples = 296 \n", "timesteps = 5 \n", "features = 2 \n", - "array h0 shape (296, 2) min 1.697403 max 16.491959 type \n", - "h2 = 300 \n", - "2024-04-17 02:29:58,703 - INFO - train_rnn start, hours=854 fit=True\n", - "2024-04-17 02:29:58,704 - INFO - case = case11\n", - "2024-04-17 02:29:58,705 - INFO - samples = 296\n", - "2024-04-17 02:29:58,706 - INFO - features = 2\n", - "2024-04-17 02:29:58,706 - INFO - timesteps = 5\n", - "2024-04-17 02:29:58,707 - INFO - centering = [0.0, 0.0]\n", - "2024-04-17 02:29:58,708 - INFO - training = None\n", - "2024-04-17 02:29:58,709 - INFO - batch_size = inf\n", - "2024-04-17 02:29:58,709 - INFO - initialize = False\n", - "2024-04-17 02:29:58,710 - INFO - replacing batch_size by 296\n", - "2024-04-17 02:29:58,711 - INFO - epochs = 5000\n", + "2024-04-17 09:34:04,007 - INFO - train_rnn start, hours=854 fit=True\n", + "2024-04-17 09:34:04,008 - INFO - case = case11\n", + "2024-04-17 09:34:04,009 - INFO - samples = 296\n", + "2024-04-17 09:34:04,009 - INFO - features = 2\n", + "2024-04-17 09:34:04,010 - INFO - timesteps = 5\n", + "2024-04-17 09:34:04,011 - INFO - centering = [0.0, 0.0]\n", + "2024-04-17 09:34:04,011 - INFO - training = None\n", + "2024-04-17 09:34:04,012 - INFO - batch_size = inf\n", + "2024-04-17 09:34:04,013 - INFO - initialize = False\n", + "2024-04-17 09:34:04,013 - INFO - replacing batch_size by 296\n", + "2024-04-17 09:34:04,014 - INFO - epochs = 5000\n", "Function: moisture_rnn.create_RNN_2\n", "Arguments:\n", " hidden_units = 6\n", @@ -746,7 +746,7 @@ "x_train hash = 14769812307992047695\n", "y_train hash = 10119650379183749962\n", "NOT initializing weights\n", - "2024-04-17 02:29:58,987 - INFO - verbose_fit = 0\n" + "2024-04-17 09:34:04,281 - INFO - verbose_fit = 0\n" ] }, { @@ -764,7 +764,7 @@ "output_type": "stream", "text": [ "fitted weights hash = 3.5246083873473495e+19\n", - "1/1 [==============================] - 0s 240ms/step\n", + "1/1 [==============================] - 0s 250ms/step\n", "x_input.shape= (1, 854, 2) y_output.shape= (1, 854, 1)\n", "check 5: 3.5246083873473495e+19 should be 3.5246083873473495e+19 error 0.0\n", "checkm= 3.77248024940490723 error 0.0\n", @@ -785,7 +785,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-04-17 02:30:14,541 - INFO - run_rnn end\n", + "2024-04-17 09:34:20,005 - INFO - run_rnn end\n", "case11 Training 1 to 300 hours RMSE: 0.749\n", "case11 Prediction 301 to 854 hours RMSE: 0.7233\n", "*** params 0 case case11 summary ***\n", @@ -934,11 +934,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "d64157c2-24e6-4e42-9c8d-0be45ce0c529", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-04-17 09:34:20,014 - INFO - fmda_rnn_rain.ipynb done\n" + ] + } + ], + "source": [ + "logging.info('fmda_rnn_rain.ipynb done')" + ] }, { "cell_type": "code", diff --git a/fmda/moisture_rnn.py b/fmda/moisture_rnn.py index afb9e69..a33d1cc 100644 --- a/fmda/moisture_rnn.py +++ b/fmda/moisture_rnn.py @@ -215,17 +215,27 @@ def create_rnn_data_1(dat, params, hours=None, h2=None): logging.info('target matrix Y shape %s',np.shape(Y)) logging.info('features_list: %s',features_list) + if hours is None: + hours = dat['hours'] + if h2 is None: + h2 = dat['h2'] + rnn_dat={ + 'case':dat['case'], + 'hours':hours, + 'h2':h2, 'rain_do':rain_do, 'features_list':features_list, 'scale':scale, 'scale_fm':scale_fm, 'scale_rain':scale_rain, + 'X':X, + 'Y':Y } - return X, Y, rnn_dat + return rnn_dat -def create_rnn_data_2(X, Y, rnn_dat, dat, params, hours=None, h2=None): +def create_rnn_data_2(rnn_dat, params): logging.info('create_rnn_data_2 start') @@ -233,14 +243,10 @@ def create_rnn_data_2(X, Y, rnn_dat, dat, params, hours=None, h2=None): scale = params['scale'] verbose = params['verbose'] batch_size = params['batch_size'] - - logging.info('create_rnn_data_2: hours=%s h2=%s',hours,h2) - - if hours is None: - hours = dat['hours'] - if h2 is None: - h2 = dat['h2'] - + X = rnn_dat['X'] + Y = rnn_dat['Y'] + h2= rnn_dat['h2'] + logging.info('batch_size=%s',batch_size) if batch_size is None or batch_size is np.inf: x_train, y_train = staircase(X,Y,timesteps=timesteps,datapoints=h2, @@ -260,22 +266,16 @@ def create_rnn_data_2(X, Y, rnn_dat, dat, params, hours=None, h2=None): # Set up return dictionary rnn_dat.update({ - 'case':dat['case'], - 'hours': hours, 'x_train': x_train, 'y_train': y_train, - 'X': X, 'samples': samples, 'timesteps': timesteps, 'features':features, - 'h0': h0, - 'hours':hours, - 'h2':h2 }) logging.info('create_rnn_data_2 done') - return rnn_dat + # return rnn_dat from tensorflow.keras.callbacks import Callback @@ -530,8 +530,8 @@ def run_rnn(case_data,params,fit=True,title2=''): verbose = params['verbose'] reproducibility.set_seed() # Set seed for reproducibility - X, Y, rnn_dat = create_rnn_data_1(case_data,params) - rnn_dat = create_rnn_data_2(X,Y,rnn_dat,case_data,params) + rnn_dat = create_rnn_data_1(case_data,params) + create_rnn_data_2(rnn_dat,params) if params['verbose']: check_data(rnn_dat,case=0,name='rnn_dat') model_predict = train_rnn( diff --git a/fmda/moisture_rnn_pkl.py b/fmda/moisture_rnn_pkl.py index 47366c5..de7e221 100644 --- a/fmda/moisture_rnn_pkl.py +++ b/fmda/moisture_rnn_pkl.py @@ -5,6 +5,8 @@ import pickle import os.path as osp import pandas as pd import numpy as np +import reproducibility +from moisture_rnn import create_rnn_data_2, train_rnn, rnn_predict def pkl2train(input_file_paths,output_file_path='train.pkl',forecast_step=1): # in: @@ -57,6 +59,7 @@ def pkl2train(input_file_paths,output_file_path='train.pkl',forecast_step=1): raise(ValueError) # build matrix of features - assuming all the same length, if not column_stack will fail train[key]['time']=time_hrrr + columns=[] # location as features constant in time come first columns.append(np.full(timesteps,loc['elev'])) @@ -69,6 +72,7 @@ def pkl2train(input_file_paths,output_file_path='train.pkl',forecast_step=1): logging.info('%s rain as difference %s minus %s: min %s max %s',key,fstep,fprev,np.min(rain),np.max(rain)) columns.append( rain ) # add rain feature train[key]['X'] = np.column_stack(columns) + logging.info(f"Created feature matrix train[{key}]['X'] shape {train[key]['X'].shape}") time_raws=str2time(subdict['RAWS']['time_raws']) # may not be the same as HRRR logging.info('%s RAWS.time_raws length is %s',key,len(time_raws)) @@ -78,6 +82,7 @@ def pkl2train(input_file_paths,output_file_path='train.pkl',forecast_step=1): logging.info('%s RAWS.fm length is %s',key,len(fm)) # interpolate RAWS sensors to HRRR time and over NaNs train[key]['Y'] = time_intp(time_raws,fm,time_hrrr) + if train[key]['Y'] is None: logging.error('Cannot create target matrix for %s, using None',key) else: @@ -111,7 +116,7 @@ def pkl2train(input_file_paths,output_file_path='train.pkl',forecast_step=1): return train -def run_rnn_pkl(case_data,params,fit=True,title2=''): +def run_rnn_pkl(rnn_dat,params,fit=True,title2=''): # Run RNN on given a case subdictionary of the output of pkl2train # Inputs: # case_data: (dict) @@ -124,7 +129,7 @@ def run_rnn_pkl(case_data,params,fit=True,title2=''): reproducibility.set_seed() # Set seed for reproducibility - rnn_dat = create_rnn_data_2(X,Y,rnn_dat,case_data,params) + create_rnn_data_2(rnn_dat,params) model_predict = train_rnn( rnn_dat, diff --git a/fmda/test-plk2train.ipynb b/fmda/test-plk2train.ipynb index 571e715..83b3e9c 100644 --- a/fmda/test-plk2train.ipynb +++ b/fmda/test-plk2train.ipynb @@ -9,8 +9,10 @@ "source": [ "from utils import print_dict_summary, print_first, str2time, logging_setup\n", "import pickle\n", + "import logging\n", "import os.path as osp\n", - "from moisture_rnn_pkl import pkl2train" + "from moisture_rnn_pkl import pkl2train, run_rnn_pkl\n", + "from moisture_rnn import create_rnn_data_2 " ] }, { @@ -52,7 +54,18 @@ "metadata": {}, "outputs": [], "source": [ - "train = pkl2train(file_paths)" + "# train = pkl2train(file_paths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57c9dbea-d033-4eb1-b38e-2bbba6980b92", + "metadata": {}, + "outputs": [], + "source": [ + "with open('train.pkl','rb') as file:\n", + " train=pickle.load(file)" ] }, { @@ -62,7 +75,48 @@ "metadata": {}, "outputs": [], "source": [ - "print_dict_summary(train)" + "# print_dict_summary(train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "698df86b-8550-4135-81df-45dbf503dd4e", + "metadata": {}, + "outputs": [], + "source": [ + "from module_param_sets import param_sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b0c9a9b-dd02-4251-aa4a-2acc1101e153", + "metadata": {}, + "outputs": [], + "source": [ + "param_sets_keys=['1']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b7ce5c3-4b6b-4fd4-9eb1-858318415270", + "metadata": {}, + "outputs": [], + "source": [ + "cases=[list(train.keys())[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd22baf2-59d2-460e-8c47-b20116dd5982", + "metadata": {}, + "outputs": [], + "source": [ + "logging.info('Running over parameter sets %s',param_sets_keys)\n", + "logging.info('Running over cases %s',cases)" ] }, { @@ -71,7 +125,21 @@ "id": "dc5b47bd-4fbc-44b8-b2dd-d118e068b450", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "for i in param_sets_keys:\n", + " for case in cases:\n", + " run_rnn_pkl(train[case],param_sets[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15384e4d-b8ec-4700-bdc2-83b0433d11c9", + "metadata": {}, + "outputs": [], + "source": [ + "logging.info('test-plk2train.ipynb done')" + ] } ], "metadata": { -- 2.11.4.GIT