**TAIEX.s30_Future_Engineering.特徵工程
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
# 加載數據
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data_with_OBV.csv'
data = pd.read_csv(file_path)
# 更改列名
data.rename(columns={'Close_MSCI': 'Close'}, inplace=True)
# 1. 計算移動平均線 (SMA)
data['MA10'] = data['Close'].rolling(window=10).mean()
data['MA50'] = data['Close'].rolling(window=50).mean()
# 2. 計算相對強弱指數 (RSI)
def compute_rsi(data, window=14):
delta = data['Close'].diff(1)
gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
rs = gain / loss
rsi = 100 - (100 / (1 + rs))
return rsi
data['RSI'] = compute_rsi(data)
# 3. 計算移動平均收斂背離 (MACD)
def compute_macd(data, fast=12, slow=26, signal=9):
exp1 = data['Close'].ewm(span=fast, adjust=False).mean()
exp2 = data['Close'].ewm(span=slow, adjust=False).mean()
macd = exp1 - exp2
signal_line = macd.ewm(span=signal, adjust=False).mean()
macd_hist = macd - signal_line
return macd, signal_line, macd_hist
data['MACD'], data['MACD_Signal'], data['MACD_Hist'] = compute_macd(data)
# 4. 填補缺失值
numeric_cols = data.select_dtypes(include=[np.number]).columns
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())
# 5. 特徵選擇 (排除非數值型列)
numeric_data = data.select_dtypes(include=[np.number])
correlation_matrix = numeric_data.corr()
target_corr = correlation_matrix['Close_TAIEX'].abs().sort_values(ascending=False)
selected_features = target_corr[target_corr > 0.5].index
# 6. 特徵縮放
scaled_data = StandardScaler().fit_transform(data[selected_features])
scaled_data = pd.DataFrame(scaled_data, columns=selected_features)
# 顯示預處理後的數據
print(scaled_data.head())
# 保存包含技術指標的數據到新文件
new_file_path_with_indicators = '/content/drive/My Drive/MSCI_Taiwan_30_data_with_indicators_scaled.csv'
scaled_data.to_csv(new_file_path_with_indicators, index=False)
Close_TAIEX OBV
0 -1.394161 1.700066
1 -1.394161 1.699456
2 -1.394161 1.699519
3 -1.394161 1.699702
4 -1.394161 1.699622