TAIEX.s41_Training.1'st.第一次訓練,RandomForest+SVR

TAIEX.s41_Training.1'st.第一次訓練,RandomForest+SVR

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

# 加載數據
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data_with_OBV.csv'
data = pd.read_csv(file_path)

# 確保日期列已經轉換為 datetime 類型
data['Date'] = pd.to_datetime(data['Date'])

# 將 Close_MSCI 重命名為 Close
data.rename(columns={'Close_MSCI': 'Close'}, inplace=True)

# 計算移動平均線 (SMA)
data['MA10'] = data['Close'].rolling(window=10).mean()
data['MA50'] = data['Close'].rolling(window=50).mean()

# 計算相對強弱指數 (RSI)
def compute_rsi(data, window=14):
    delta = data['Close'].diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

data['RSI'] = compute_rsi(data)

# 計算移動平均收斂背離 (MACD)
def compute_macd(data, fast=12, slow=26, signal=9):
    exp1 = data['Close'].ewm(span=fast, adjust=False).mean()
    exp2 = data['Close'].ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    macd_hist = macd - signal_line
    return macd, signal_line, macd_hist

data['MACD'], data['MACD_Signal'], data['MACD_Hist'] = compute_macd(data)

# 填補缺失值
numeric_columns = data.select_dtypes(include=[np.number]).columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())

# 選擇數值列進行相關性分析
numeric_data = data.select_dtypes(include=[np.number])

# 特徵選擇
correlation_matrix = numeric_data.corr()
target_corr = correlation_matrix['Close_TAIEX'].abs().sort_values(ascending=False)
selected_features = target_corr[target_corr > 0.5].index

# 特徵縮放
scaled_data = StandardScaler().fit_transform(numeric_data[selected_features])
scaled_data = pd.DataFrame(scaled_data, columns=selected_features)

# 分割數據為訓練集和測試集
train_size = int(len(data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

train_labels = data['Close_TAIEX'][:train_size]
test_labels = data['Close_TAIEX'][train_size:]

# 訓練隨機森林模型
rf_model = RandomForestRegressor()
rf_model.fit(train_data, train_labels)
rf_predictions = rf_model.predict(test_data)

# 訓練支持向量回歸模型 (SVR)
svr_model = SVR()
svr_model.fit(train_data, train_labels)
svr_predictions = svr_model.predict(test_data)

# 繪製實際值與預測值的圖表
plt.figure(figsize=(12, 6))

# 繪製實際值
plt.plot(data['Date'][train_size:], test_labels, label='Actual', color='blue', linewidth=0.5)

# 繪製隨機森林預測值
plt.plot(data['Date'][train_size:], rf_predictions, label='Random Forest Predictions', color='red', linewidth=0.5, alpha=0.7)

# 繪製SVR預測值
plt.plot(data['Date'][train_size:], svr_predictions, label='SVR Predictions', color='green', linewidth=0.5, alpha=0.7)

plt.title('Model Predictions vs Actual Values')
plt.xlabel('Date')
plt.ylabel('Close_TAIEX (Scaled)')
plt.legend()

# 設置日期格式和標注每年的第一天
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y/%m/%d'))
plt.gcf().autofmt_xdate()  # 自動旋轉日期標籤

plt.show()