From ba47d07d7aa3893e5afa7b7b90343c7dd3544aeb Mon Sep 17 00:00:00 2001 From: jh-206 Date: Wed, 18 Sep 2024 11:43:05 -0600 Subject: [PATCH] Update data_funcs.py --- fmda/data_funcs.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fmda/data_funcs.py b/fmda/data_funcs.py index 3bca72b..234f297 100644 --- a/fmda/data_funcs.py +++ b/fmda/data_funcs.py @@ -16,16 +16,18 @@ import subprocess import os.path as osp from utils import Dict, str2time, check_increment, time_intp -def process_train_dict(input_file_paths, data_params, atm_dict = "HRRR", verbose=False): +def process_train_dict(input_file_paths, params_data, atm_dict = "HRRR", verbose=False): + if type(input_file_paths) is not list: + raise ValueError(f"Argument `input_file_paths` must be list, received {type(input_file_paths)}") train = {} for file_path in input_file_paths: # Extract target and features - di = build_train_dict(file_path, atm=atm_dict, features_all=data_params['features_all'], verbose=verbose) + di = build_train_dict(file_path, atm=atm_dict, features_all=params_data['features_all'], verbose=verbose) # Subset timeseries into shorter stretches - di = split_timeseries(di, hours=data_params['hours'], verbose=verbose) - di = discard_keys_with_short_y(di, hours=data_params['hours'], verbose=False) + di = split_timeseries(di, hours=params_data['hours'], verbose=verbose) + di = discard_keys_with_short_y(di, hours=params_data['hours'], verbose=False) # Check for suspect data - flags = flag_dict_keys(di, data_params['zero_lag_threshold'], data_params['max_intp_time'], max_y = data_params['max_fm'], min_y = data_params['min_fm'], verbose=verbose) + flags = flag_dict_keys(di, params_data['zero_lag_threshold'], params_data['max_intp_time'], max_y = params_data['max_fm'], min_y = params_data['min_fm'], verbose=verbose) # Remove flagged cases cases = list([*di.keys()]) flagged_cases = [element for element, flag in zip(cases, flags) if flag == 1] -- 2.11.4.GIT