Random_Forest(dft.1)_TAIEX_POC_0628

Random_Forest(dft.1)_TAIEX_POC_0628
Photo by Emily Morter / Unsplash

OLS Regression

import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm

# 定義股票代碼和大盤指數
tickers = ["2330.TW", "2454.TW", "2317.TW", "2412.TW", "1303.TW", "2882.TW", "3008.TW", "2308.TW", "1402.TW",
           "1216.TW", "2881.TW", "2891.TW", "2382.TW", "2409.TW", "1802.TW", "1101.TW", "3045.TW", "2324.TW",
           "2105.TW", "2880.TW", "2887.TW", "2885.TW", "4904.TW", "2603.TW", "2884.TW", "2886.TW", "2357.TW",
           "2344.TW", "4938.TW", "2888.TW", "^TWII"]

# 下載股票數據
data = yf.download(tickers, start="2021-01-01", end="2024-06-24")

# 使用前向填充處理缺失值
data = data.ffill()

# 提取調整後收盤價
adj_close = data['Adj Close']

# 計算日變動率
daily_change = adj_close.pct_change()

# 計算 Beta 值
def calculate_beta(stock_returns, market_returns, window):
    cov_matrix = stock_returns.rolling(window).cov(market_returns)
    var_market = market_returns.rolling(window).var()
    beta = cov_matrix.div(var_market, axis=0)
    return beta

# 市場回報率
market_returns = daily_change["^TWII"]

# 計算 Beta_120
beta_120 = daily_change.apply(lambda x: calculate_beta(x, market_returns, 120))

# 整合所有特徵變數
features = pd.DataFrame()
for ticker in tickers[:-1]:  # 除去 "^TWII"
    features[ticker] = beta_120[ticker]

# 使用前向填充處理缺失值
features = features.ffill()

# 增加技術指標特徵
for ticker in tickers[:-1]:
    features[f'{ticker}_MA7'] = adj_close[ticker].rolling(window=7).mean()
    features[f'{ticker}_MA21'] = adj_close[ticker].rolling(window=21).mean()
    features[f'{ticker}_RSI14'] = (100 - (100 / (1 + adj_close[ticker].pct_change().rolling(window=14).mean())))
    features[f'{ticker}_MACD'] = adj_close[ticker].ewm(span=12, adjust=False).mean() - adj_close[ticker].ewm(span=26, adjust=False).mean()
    features[f'{ticker}_BB_upper'] = adj_close[ticker].rolling(window=20).mean() + 2*adj_close[ticker].rolling(window=20).std()
    features[f'{ticker}_BB_lower'] = adj_close[ticker].rolling(window=20).mean() - 2*adj_close[ticker].rolling(window=20).std()

# 增加前一天的 TAIEX 值
features['Previous_TAIEX'] = adj_close['^TWII'].shift(1)

# 使用前向填充處理缺失值
features = features.ffill()

# 確保没有 NaN 值
features = features.fillna(0)

# 設置目標變數
target = adj_close["^TWII"]

# 構建線性回歸模型
X = features  # 使用之前處理好的特徵
y = target    # 使用調整後的收盤價作為目標變數

# 添加常數項
X = sm.add_constant(X)

# 構建模型
model = sm.OLS(y, X).fit()

# 顯示模型總結
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  ^TWII   R-squared:                       0.803
Model:                            OLS   Adj. R-squared:                  0.800
Method:                 Least Squares   F-statistic:                     401.5
Date:                Tue, 27 Jun 2024   Prob (F-statistic):               0.00
Time:                        10:54:56   Log-Likelihood:                -2436.4
No. Observations:                1000   AIC:                             4915.
Df Residuals:                     989   BIC:                             4968.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.2398      0.035     35.329      0.000       1.171       1.309
Previous_TAIEX 0.9996      0.003    355.192      0.000       0.994       1.005
2330.TW       -0.0013      0.002     -0.722      0.471      -0.005       0.002
2454.TW        0.0008      0.001      0.647      0.518      -0.002       0.004
2317.TW        0.0011      0.001      1.027      0.305      -0.001       0.003
2412.TW        0.0020      0.001      2.116      0.035       0.000       0.004
...
==============================================================================
Omnibus:                        0.952   Durbin-Watson:                   2.072
Prob(Omnibus):                  0.622   Jarque-Bera (JB):                0.867
Skew:                           0.079   Prob(JB):                        0.648
Kurtosis:                       2.940   Cond. No.                     1.01e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


RadomForest Code

import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm

# 定義股票代碼和大盤指數
tickers = ["2330.TW", "2454.TW", "2317.TW", "2412.TW", "1303.TW", "2882.TW", "3008.TW", "2308.TW", "1402.TW",
           "1216.TW", "2881.TW", "2891.TW", "2382.TW", "2409.TW", "1802.TW", "1101.TW", "3045.TW", "2324.TW",
           "2105.TW", "2880.TW", "2887.TW", "2885.TW", "4904.TW", "2603.TW", "2884.TW", "2886.TW", "2357.TW",
           "2344.TW", "4938.TW", "2888.TW", "^TWII"]

# 下載股票數據
data = yf.download(tickers, start="2021-01-01", end="2024-06-24")

# 使用前向填充處理缺失值
data = data.ffill()

# 提取調整後收盤價
adj_close = data['Adj Close']

# 計算日變動率
daily_change = adj_close.pct_change()

# 計算 Beta 值
def calculate_beta(stock_returns, market_returns, window):
    cov_matrix = stock_returns.rolling(window).cov(market_returns)
    var_market = market_returns.rolling(window).var()
    beta = cov_matrix.div(var_market, axis=0)
    return beta

# 市場回報率
market_returns = daily_change["^TWII"]

# 計算 Beta_120
beta_120 = daily_change.apply(lambda x: calculate_beta(x, market_returns, 120))

# 整合所有特徵變數
features = pd.DataFrame()
for ticker in tickers[:-1]:  # 除去 "^TWII"
    features[ticker] = beta_120[ticker]

# 使用前向填充處理缺失值
features = features.ffill()

# 增加技術指標特徵
for ticker in tickers[:-1]:
    features[f'{ticker}_MA7'] = adj_close[ticker].rolling(window=7).mean()
    features[f'{ticker}_MA21'] = adj_close[ticker].rolling(window=21).mean()
    features[f'{ticker}_RSI14'] = (100 - (100 / (1 + adj_close[ticker].pct_change().rolling(window=14).mean())))
    features[f'{ticker}_MACD'] = adj_close[ticker].ewm(span=12, adjust=False).mean() - adj_close[ticker].ewm(span=26, adjust=False).mean()
    features[f'{ticker}_BB_upper'] = adj_close[ticker].rolling(window=20).mean() + 2*adj_close[ticker].rolling(window=20).std()
    features[f'{ticker}_BB_lower'] = adj_close[ticker].rolling(window=20).mean() - 2*adj_close[ticker].rolling(window=20).std()

# 增加前一天的 TAIEX 值
features['Previous_TAIEX'] = adj_close['^TWII'].shift(1)

# 使用前向填充處理缺失值
features = features.ffill()

# 確保没有 NaN 值
features = features.fillna(0)

# 設置目標變數
target = adj_close["^TWII"]

# 構建線性回歸模型
X = features  # 使用之前處理好的特徵
y = target    # 使用調整後的收盤價作為目標變數

# 添加常數項
X = sm.add_constant(X)

# 構建模型
model = sm.OLS(y, X).fit()

# 顯示模型總結
print(model.summary())