-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStockPricePredictorV3.py
More file actions
96 lines (77 loc) · 3.79 KB
/
Copy pathStockPricePredictorV3.py
File metadata and controls
96 lines (77 loc) · 3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
import pandas as pd
import xgboost as xgb
import optuna
import pandas_ta as ta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
#Loading the dataset by taking the file path as input, and adding a technical indicates: RSI, MACD, SMA of last 5 and 10 days,
#Bollinger Bands, ATR and Rate of Price Change.
def DataPreparation(filePath):
df = pd.read_csv(filePath, parse_dates=['Date'])
df.sort_values(by='Date', inplace=True)
df.drop(columns=['Date', 'Adj Close'], inplace=True)
for i in range(1, 6):
df[f'Open{i}'] = df['Open'].shift(i)
df[f'High{i}'] = df['High'].shift(i)
df[f'Low{i}'] = df['Low'].shift(i)
df[f'Close{i}'] = df['Close'].shift(i)
df[f'Volume{i}'] = df['Volume'].shift(i)
df['SMA5'] = df['Close'].rolling(window=5).mean()
df['SMA10'] = df['Close'].rolling(window=10).mean()
df['RSI'] = ta.rsi(df['Close'])
bbands = ta.bbands(df['Close'])
df = pd.concat([df, bbands], axis=1)
macd = ta.macd(df['Close'])
df = pd.concat([df, macd], axis=1)
df['ATR'] = ta.atr(df['High'], df['Low'], df['Close'])
df['ROC'] = ta.roc(df['Close'])
df.dropna(inplace=True)
return df
#Optimizing the model through baynesian optimization, and fitting the training dataset.
def objective(trial, Xtrain, Xtest, Ytrain, Ytest):
params ={
'objective': 'reg:squarederror',
'n_estimators': trial.suggest_int('n_estimators', 100, 500),
'max_depth': trial.suggest_int('max_depth', 3, 10),
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
'subsample': trial.suggest_float('subsample', 0.5, 1.0),
'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
'gamma': trial.suggest_float('gamma', 0, 10),
'lambda': trial.suggest_float('lambda', 1, 5),
'alpha': trial.suggest_float('alpha', 0, 5)
}
model = xgb.XGBRegressor(**params, random_state=17, n_jobs=-1)
model.fit(Xtrain, Ytrain)
predY = model.predict(Xtest)
return mean_squared_error(Ytest, predY)
#Splitting the dataset into traing (80%) and testing (20%) dataset, and training the model with optimized XGBoost algorithm.
def trainModel(df):
X = df.drop(columns=['Close'])
Y = df['Close']
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=17)
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, Xtrain, Xtest, Ytrain, Ytest), n_trials=50)
#Displaying the best hyperarameters for the XGB regressor model, as found by the Optuna Baynesian optimizer.
print("Best hyperparameters:", study.best_params)
#Fiting and making predictions.
bestModel = xgb.XGBRegressor(**study.best_params, random_state=17, n_jobs=-1)
bestModel.fit(Xtrain, Ytrain)
PredY = bestModel.predict(Xtest)
#Evaluating the model with Mean Squared Error and R2 scores.
mse = mean_squared_error(Ytest, PredY)
r2 = r2_score(Ytest, PredY)
print(f"Mean Squared Error: {mse:.10f}")
print(f"R² Score: {r2:.10f}")
#Outputing random five day to day predictions.
for pred, actual in zip(PredY[:5], Ytest[:5]):
print(f"Predicted: {pred}; Actual: {actual}")
latest_data = X.iloc[-1:].values
predicted_price = bestModel.predict(latest_data)[0]
print(f"Predicted price after five trading days: {predicted_price}")
return bestModel, X.columns
#The driver code.
if __name__ == "__main__":
filePath = input("Enter the file path of the stock prices dataset (CSV): ")
df = DataPreparation(filePath)
model, featureNames = trainModel(df)