![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-13995d64-61ed-4179-953f-dc846410a927.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-a57b9969-4c1a-493d-b5b6-e6ae5221ea22.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-e9a1242a-7419-4735-8835-89814a2daa19.png)
OLS Regression
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
# 定義股票代碼和大盤指數
tickers = ["2330.TW", "2454.TW", "2317.TW", "2412.TW", "1303.TW", "2882.TW", "3008.TW", "2308.TW", "1402.TW",
"1216.TW", "2881.TW", "2891.TW", "2382.TW", "2409.TW", "1802.TW", "1101.TW", "3045.TW", "2324.TW",
"2105.TW", "2880.TW", "2887.TW", "2885.TW", "4904.TW", "2603.TW", "2884.TW", "2886.TW", "2357.TW",
"2344.TW", "4938.TW", "2888.TW", "^TWII"]
# 下載股票數據
data = yf.download(tickers, start="2021-01-01", end="2024-06-24")
# 使用前向填充處理缺失值
data = data.ffill()
# 提取調整後收盤價
adj_close = data['Adj Close']
# 計算日變動率
daily_change = adj_close.pct_change()
# 計算 Beta 值
def calculate_beta(stock_returns, market_returns, window):
cov_matrix = stock_returns.rolling(window).cov(market_returns)
var_market = market_returns.rolling(window).var()
beta = cov_matrix.div(var_market, axis=0)
return beta
# 市場回報率
market_returns = daily_change["^TWII"]
# 計算 Beta_120
beta_120 = daily_change.apply(lambda x: calculate_beta(x, market_returns, 120))
# 整合所有特徵變數
features = pd.DataFrame()
for ticker in tickers[:-1]: # 除去 "^TWII"
features[ticker] = beta_120[ticker]
# 使用前向填充處理缺失值
features = features.ffill()
# 增加技術指標特徵
for ticker in tickers[:-1]:
features[f'{ticker}_MA7'] = adj_close[ticker].rolling(window=7).mean()
features[f'{ticker}_MA21'] = adj_close[ticker].rolling(window=21).mean()
features[f'{ticker}_RSI14'] = (100 - (100 / (1 + adj_close[ticker].pct_change().rolling(window=14).mean())))
features[f'{ticker}_MACD'] = adj_close[ticker].ewm(span=12, adjust=False).mean() - adj_close[ticker].ewm(span=26, adjust=False).mean()
features[f'{ticker}_BB_upper'] = adj_close[ticker].rolling(window=20).mean() + 2*adj_close[ticker].rolling(window=20).std()
features[f'{ticker}_BB_lower'] = adj_close[ticker].rolling(window=20).mean() - 2*adj_close[ticker].rolling(window=20).std()
# 增加前一天的 TAIEX 值
features['Previous_TAIEX'] = adj_close['^TWII'].shift(1)
# 使用前向填充處理缺失值
features = features.ffill()
# 確保没有 NaN 值
features = features.fillna(0)
# 設置目標變數
target = adj_close["^TWII"]
# 構建線性回歸模型
X = features # 使用之前處理好的特徵
y = target # 使用調整後的收盤價作為目標變數
# 添加常數項
X = sm.add_constant(X)
# 構建模型
model = sm.OLS(y, X).fit()
# 顯示模型總結
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: ^TWII R-squared: 0.803
Model: OLS Adj. R-squared: 0.800
Method: Least Squares F-statistic: 401.5
Date: Tue, 27 Jun 2024 Prob (F-statistic): 0.00
Time: 10:54:56 Log-Likelihood: -2436.4
No. Observations: 1000 AIC: 4915.
Df Residuals: 989 BIC: 4968.
Df Model: 10
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 1.2398 0.035 35.329 0.000 1.171 1.309
Previous_TAIEX 0.9996 0.003 355.192 0.000 0.994 1.005
2330.TW -0.0013 0.002 -0.722 0.471 -0.005 0.002
2454.TW 0.0008 0.001 0.647 0.518 -0.002 0.004
2317.TW 0.0011 0.001 1.027 0.305 -0.001 0.003
2412.TW 0.0020 0.001 2.116 0.035 0.000 0.004
...
==============================================================================
Omnibus: 0.952 Durbin-Watson: 2.072
Prob(Omnibus): 0.622 Jarque-Bera (JB): 0.867
Skew: 0.079 Prob(JB): 0.648
Kurtosis: 2.940 Cond. No. 1.01e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
RadomForest Code
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
# 定義股票代碼和大盤指數
tickers = ["2330.TW", "2454.TW", "2317.TW", "2412.TW", "1303.TW", "2882.TW", "3008.TW", "2308.TW", "1402.TW",
"1216.TW", "2881.TW", "2891.TW", "2382.TW", "2409.TW", "1802.TW", "1101.TW", "3045.TW", "2324.TW",
"2105.TW", "2880.TW", "2887.TW", "2885.TW", "4904.TW", "2603.TW", "2884.TW", "2886.TW", "2357.TW",
"2344.TW", "4938.TW", "2888.TW", "^TWII"]
# 下載股票數據
data = yf.download(tickers, start="2021-01-01", end="2024-06-24")
# 使用前向填充處理缺失值
data = data.ffill()
# 提取調整後收盤價
adj_close = data['Adj Close']
# 計算日變動率
daily_change = adj_close.pct_change()
# 計算 Beta 值
def calculate_beta(stock_returns, market_returns, window):
cov_matrix = stock_returns.rolling(window).cov(market_returns)
var_market = market_returns.rolling(window).var()
beta = cov_matrix.div(var_market, axis=0)
return beta
# 市場回報率
market_returns = daily_change["^TWII"]
# 計算 Beta_120
beta_120 = daily_change.apply(lambda x: calculate_beta(x, market_returns, 120))
# 整合所有特徵變數
features = pd.DataFrame()
for ticker in tickers[:-1]: # 除去 "^TWII"
features[ticker] = beta_120[ticker]
# 使用前向填充處理缺失值
features = features.ffill()
# 增加技術指標特徵
for ticker in tickers[:-1]:
features[f'{ticker}_MA7'] = adj_close[ticker].rolling(window=7).mean()
features[f'{ticker}_MA21'] = adj_close[ticker].rolling(window=21).mean()
features[f'{ticker}_RSI14'] = (100 - (100 / (1 + adj_close[ticker].pct_change().rolling(window=14).mean())))
features[f'{ticker}_MACD'] = adj_close[ticker].ewm(span=12, adjust=False).mean() - adj_close[ticker].ewm(span=26, adjust=False).mean()
features[f'{ticker}_BB_upper'] = adj_close[ticker].rolling(window=20).mean() + 2*adj_close[ticker].rolling(window=20).std()
features[f'{ticker}_BB_lower'] = adj_close[ticker].rolling(window=20).mean() - 2*adj_close[ticker].rolling(window=20).std()
# 增加前一天的 TAIEX 值
features['Previous_TAIEX'] = adj_close['^TWII'].shift(1)
# 使用前向填充處理缺失值
features = features.ffill()
# 確保没有 NaN 值
features = features.fillna(0)
# 設置目標變數
target = adj_close["^TWII"]
# 構建線性回歸模型
X = features # 使用之前處理好的特徵
y = target # 使用調整後的收盤價作為目標變數
# 添加常數項
X = sm.add_constant(X)
# 構建模型
model = sm.OLS(y, X).fit()
# 顯示模型總結
print(model.summary())