-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommon_constants.py
More file actions
111 lines (101 loc) · 3.63 KB
/
common_constants.py
File metadata and controls
111 lines (101 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import multiprocessing as mp
import optuna
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import (
mean_absolute_error,
mean_absolute_percentage_error,
mean_squared_error,
)
from xgboost import XGBRegressor
from time_constants import (
DAYS_PER_MONTH,
DAYS_PER_WEEK,
FIFTEEN_MINUTES_PER_HOUR,
HOURS_PER_DAY,
)
# For loading data
Y_FILE = "data/load.csv" # Path of the CSV file containing y data
WEATHER_DATA_FILE = None # Path of the CSV file containing weather data
TARGET_VARIABLE = "Load" # Target variable name
# For creating features
AR_FROM_Y = True # Autoregressive features from y
AR_FROM_WEATHER_DATA = False # Autoregressive features from weather data
LAGS = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * 3
) # Number of lags to use for autoregressive features; 3 days
MAX_LAGS = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * 3
) # Maximum number of lags to use for autoregressive features; 3 days
# For creating X and y
FORECAST_HORIZON = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY
) # Number of steps to forecast; 1 day
# For CV
WINDOW_LENGTH = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * DAYS_PER_MONTH * 3
) # 3 months
INITIAL_WINDOW_LENGTH = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * HOURS_PER_DAY * 3
) # 3 months
STEP_LENGTH = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * DAYS_PER_MONTH
) # 1 month
VAL_LENGTH = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * DAYS_PER_MONTH
) # 1 month
OPTUNA_TRIALS = N_TRIALS = 100
# Get the number of available CPU cores
num_cores = mp.cpu_count()
# Set the number of parallel jobs for HPO to 2/3 of the available cores
OPTUNA_JOBS = N_JOBS = int(num_cores * 2 / 3)
HPO_FLAG = False # Flag to enable hyperparameter optimization
CV_STRATEGY = "rolling" # Cross-validation strategy. Must be one of: "rolling", "expanding"
MODEL_NAME = "rr" # Model name. Must be one of: "rr", "xgb", "rf"
METRIC_NAME = "mae" # Metric name. Must be one of: "mae", "mse", "rmse".
# Define metrics for Optuna objective function
OBJECTIVE_METRICS = {
"mae": mean_absolute_error,
"mse": mean_squared_error,
"mape": mean_absolute_percentage_error,
}
# Define model hyperparameter spaces
MODEL_SPACES = {
"rr": {
"alpha": optuna.distributions.FloatDistribution(0.01, 1.0, log=True)
},
"rf": {
"n_estimators": optuna.distributions.IntDistribution(2, 150),
"max_depth": optuna.distributions.IntDistribution(1, 32),
"min_samples_split": optuna.distributions.IntDistribution(2, 20),
"min_samples_leaf": optuna.distributions.IntDistribution(1, 20),
},
"xgb": {
"n_estimators": optuna.distributions.IntDistribution(2, 150),
"max_depth": optuna.distributions.IntDistribution(1, 10),
"learning_rate": optuna.distributions.FloatDistribution(
0.01, 0.3, log=True
),
"subsample": optuna.distributions.FloatDistribution(0.5, 1.0),
"colsample_bytree": optuna.distributions.FloatDistribution(0.5, 1.0),
"gamma": optuna.distributions.FloatDistribution(0, 5),
},
}
# Define model mapping
MODEL_MAPPING = {
"rr": Ridge,
"rf": RandomForestRegressor,
"xgb": XGBRegressor,
}
MLFLOW_LOGGING_FLAG = (
True # Flag to enable logging of parameters and metrics to MLflow
)
SHAP_VALUES_FLAG = False # Flag to enable calculation of SHAP values
# For simulating production
INITIAL_TRAIN_LENGTH = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * DAYS_PER_MONTH * 6
) # 6 months
TEST_LENGTH = (
FIFTEEN_MINUTES_PER_HOUR * HOURS_PER_DAY * DAYS_PER_MONTH
) # 1 month
TRAIN_TEST_STEP_LENGTH = 24 * 7 # 1 week