下載 MSCI TOP 20 股票的交易資料
!pip install pandas yfinance ta
import pandas as pd
import yfinance as yf
import ta
from google.colab import drive
# 連接 Google Drive
drive.mount('/content/drive')
# 定義 MSCI Taiwan Index 前 20 大成份股的股票代碼和名稱
msci_taiwan_20 = {
"2330.TW": "TSMC",
"2317.TW": "Hon Hai",
"2454.TW": "MediaTek",
"2382.TW": "Quanta",
"2881.TW": "Fubon FHC",
"2308.TW": "Delta",
"2412.TW": "Chunghwa Telecom",
"2882.TW": "Cathay FHC",
"2891.TW": "CTBC FHC",
"3711.TW": "ASE Technology",
"2303.TW": "UMC",
"6505.TW": "Formosa Petrochemical",
"2886.TW": "Mega FHC",
"6669.TW": "Wiwynn",
"1216.TW": "Uni-President",
"2884.TW": "E.SUN FHC",
"2885.TW": "Yuanta FHC",
"3008.TW": "Largan",
"5880.TW": "Taiwan Cooperative FHC",
"1303.TW": "Nan Ya Plastics"
}
# 定義下載數據的時間範圍
start_date = "2020-01-01"
end_date = "2024-06-30"
# 下載 Yahoo Finance 股票數據
def fetch_yahoo_data(ticker, start, end, stock_name=None):
df = yf.download(ticker, start=start, end=end)
df['ST_Code'] = ticker
if stock_name:
df['ST_Name'] = stock_name
df = df.rename(columns={"Adj Close": "Adj_Close"})
df.reset_index(inplace=True)
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y/%m/%d')
return df
# 從 Yahoo Finance 獲取 TAIEX 指數數據
def fetch_taiex_data(start, end):
df = yf.download("^TWII", start=start, end=end)
df = df.rename(columns={"Adj Close": "Adj_Close_TAIEX", "Close": "Close_TAIEX"})
df.reset_index(inplace=True)
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y/%m/%d')
return df
# 計算技術指標的函數
def calculate_indicators(df, market_close):
df = df.copy() # 確保不會修改原始數據框
df['Adj_Close'] = pd.to_numeric(df['Adj_Close'], errors='coerce')
df['MA7'] = df['Adj_Close'].rolling(window=7).mean() # 移動平均線 7 日
df['MA21'] = df['Adj_Close'].rolling(window=21).mean() # 移動平均線 21 日
df['MA50'] = df['Adj_Close'].rolling(window=50).mean() # 移動平均線 50 日
df['MA100'] = df['Adj_Close'].rolling(window=100).mean() # 移動平均線 100 日
# 布林帶
bb_indicator = ta.volatility.BollingerBands(close=df['Adj_Close'], window=20, window_dev=2)
df['Middle_Band'] = bb_indicator.bollinger_mavg()
df['Upper_Band'] = bb_indicator.bollinger_hband()
df['Lower_Band'] = bb_indicator.bollinger_lband()
df['Band_Width'] = df['Upper_Band'] - df['Lower_Band']
# 阿隆指標
aroon_indicator = ta.trend.AroonIndicator(high=df['High'], low=df['Low'], window=25)
df['Aroon_Up'] = aroon_indicator.aroon_up()
df['Aroon_Down'] = aroon_indicator.aroon_down()
# 商品渠道指數(CCI)
df['CCI20'] = ta.trend.cci(high=df['High'], low=df['Low'], close=df['Adj_Close'], window=20)
# Chande 動量擺動指標(CMO)
df['CMO14'] = calculate_cmo(df['Adj_Close'], window=14)
# MACD
macd_indicator = ta.trend.MACD(close=df['Adj_Close'])
df['MACD_Line'] = macd_indicator.macd()
df['Signal_Line'] = macd_indicator.macd_signal()
df['MACD_Histogram'] = macd_indicator.macd_diff()
# RSI
df['RSI7'] = ta.momentum.rsi(close=df['Adj_Close'], window=7)
df['RSI14'] = ta.momentum.rsi(close=df['Adj_Close'], window=14)
df['RSI21'] = ta.momentum.rsi(close=df['Adj_Close'], window=21)
# 隨機指標
stochastic_indicator = ta.momentum.StochasticOscillator(high=df['High'], low=df['Low'], close=df['Adj_Close'], window=14, smooth_window=3)
df['%K'] = stochastic_indicator.stoch()
df['%D'] = stochastic_indicator.stoch_signal()
# 威廉指標
df['WILLR14'] = ta.momentum.williams_r(high=df['High'], low=df['Low'], close=df['Adj_Close'], lbp=14)
# 平衡交易量(OBV)
df['OBV'] = ta.volume.on_balance_volume(close=df['Adj_Close'], volume=df['Volume'])
# 計算 Beta 指標
df['Market_Return'] = market_close.pct_change()
df['Stock_Return'] = df['Adj_Close'].pct_change()
df['Beta_60'] = df['Stock_Return'].rolling(window=60).cov(df['Market_Return']) / df['Market_Return'].rolling(window=60).var()
df['Beta_120'] = df['Stock_Return'].rolling(window=120).cov(df['Market_Return']) / df['Market_Return'].rolling(window=120).var()
return df
# 計算 Chande Momentum Oscillator (CMO) 的函數
def calculate_cmo(data, window):
delta = data.diff()
up = delta.where(delta > 0, 0.0)
down = -delta.where(delta < 0, 0.0)
sum_up = up.rolling(window=window).sum()
sum_down = down.rolling(window=window).sum()
cmo = 100 * (sum_up - sum_down) / (sum_up + sum_down)
return cmo
# 從 Yahoo Finance 獲取 TAIEX 指數數據
taiex_data = fetch_taiex_data(start_date, end_date)
market_close = taiex_data['Adj_Close_TAIEX']
# 下載 MSCI Taiwan 21 股票數據並添加技術指標
msci_data_list = []
for ticker, name in msci_taiwan_21.items():
try:
data = fetch_yahoo_data(ticker, start_date, end_date, stock_name=name)
data = calculate_indicators(data, market_close)
msci_data_list.append(data)
except Exception as e:
print(f"股票 {ticker} 的數據處理時出現錯誤:{e}")
# 合併所有 MSCI Taiwan 21 股票數據到一個數據框
msci_combined = pd.concat(msci_data_list, axis=0, ignore_index=True)
# 將 TAIEX 數據合併到 MSCI Taiwan 21 數據中
taiex_data = taiex_data.rename(columns={"Close_TAIEX": "Close_TAIEX", "Adj_Close_TAIEX": "Adj_Close_TAIEX"})
msci_combined = pd.merge(msci_combined, taiex_data[['Date', 'Close_TAIEX', 'Adj_Close_TAIEX']], on='Date', how='left')
# 調整列的順序
columns = ['Date', 'ST_Code', 'ST_Name'] + [col for col in msci_combined.columns if col not in ['Date', 'ST_Code', 'ST_Name']]
msci_combined = msci_combined[columns]
# 顯示合併後的數據的前五筆資料
print(msci_combined.head())
# 保存合併後的數據到 Google Drive 中的 CSV 文件
combined_output_file_path = '/content/drive/My Drive/stock_msci20/stock_20_0710.csv'
msci_combined.to_csv(combined_output_file_path, index=False, encoding='utf-8-sig')
print(f"合併後的 MSCI Taiwan 21 數據已保存到: {combined_output_file_path}")
Date ST_Code ST_Name Open High Low Close Adj_Close \
0 2020/01/02 2330.TW TSMC 332.5 339.0 332.5 339.0 307.796692
1 2020/01/03 2330.TW TSMC 344.0 345.0 335.5 339.5 308.250580
2 2020/01/06 2330.TW TSMC 333.0 334.5 332.0 332.0 301.441010
3 2020/01/07 2330.TW TSMC 332.5 333.0 326.5 329.5 299.171082
4 2020/01/08 2330.TW TSMC 325.0 333.0 325.0 329.5 299.171082
Volume MA7 ... %K %D WILLR14 OBV Market_Return Stock_Return \
0 31754120 NaN ... NaN NaN NaN 31754120 NaN NaN
1 41811268 NaN ... NaN NaN NaN 73565388 0.000822 0.001475
2 45343057 NaN ... NaN NaN NaN 28222331 -0.012970 -0.022091
3 50879181 NaN ... NaN NaN NaN -22656850 -0.006110 -0.007530
4 37567748 NaN ... NaN NaN NaN 14910898 -0.005321 0.000000
Beta_60 Beta_120 Close_TAIEX Adj_Close_TAIEX
0 NaN NaN 12100.480469 12100.480469
1 NaN NaN 12110.429688 12110.429688
2 NaN NaN 11953.360352 11953.360352
3 NaN NaN 11880.320312 11880.320312
4 NaN NaN 11817.099609 11817.099609
[5 rows x 37 columns]
合併後的 MSCI Taiwan 21 數據已保存到: /content/drive/My Drive/stock_msci20/processed_stock_data.csv
# 檢查缺失值
missing_values = msci_combined.isnull().sum()
print(missing_values)
Date 0
ST_Code 0
ST_Name 0
Open 0
High 0
Low 0
Close 0
Adj_Close 0
Volume 0
MA7 120
MA21 400
MA50 980
MA100 1980
Middle_Band 380
Upper_Band 380
Lower_Band 380
Band_Width 380
Aroon_Up 500
Aroon_Down 500
CCI20 380
CMO14 260
MACD_Line 500
Signal_Line 660
MACD_Histogram 660
RSI7 120
RSI14 260
RSI21 400
%K 260
%D 300
WILLR14 260
OBV 0
Market_Return 20
Stock_Return 20
Beta_60 1200
Beta_120 2400
Close_TAIEX 0
Adj_Close_TAIEX 0
dtype: int64
# 填補技術指標的缺失值
msci_combined['MA7'].fillna(method='bfill', inplace=True)
msci_combined['MA21'].fillna(method='bfill', inplace=True)
msci_combined['MA50'].fillna(method='bfill', inplace=True)
msci_combined['MA100'].fillna(method='bfill', inplace=True)
msci_combined['Middle_Band'].fillna(method='bfill', inplace=True)
msci_combined['Upper_Band'].fillna(method='bfill', inplace=True)
msci_combined['Lower_Band'].fillna(method='bfill', inplace=True)
msci_combined['Band_Width'].fillna(method='bfill', inplace=True)
msci_combined['Aroon_Up'].fillna(method='bfill', inplace=True)
msci_combined['Aroon_Down'].fillna(method='bfill', inplace=True)
msci_combined['CCI20'].fillna(method='bfill', inplace=True)
msci_combined['CMO14'].fillna(method='bfill', inplace=True)
msci_combined['MACD_Line'].fillna(method='bfill', inplace=True)
msci_combined['Signal_Line'].fillna(method='bfill', inplace=True)
msci_combined['MACD_Histogram'].fillna(method='bfill', inplace=True)
msci_combined['RSI7'].fillna(method='bfill', inplace=True)
msci_combined['RSI14'].fillna(method='bfill', inplace=True)
msci_combined['RSI21'].fillna(method='bfill', inplace=True)
msci_combined['%K'].fillna(method='bfill', inplace=True)
msci_combined['%D'].fillna(method='bfill', inplace=True)
msci_combined['WILLR14'].fillna(method='bfill', inplace=True)
msci_combined['Market_Return'].fillna(method='bfill', inplace=True)
msci_combined['Stock_Return'].fillna(method='bfill', inplace=True)
msci_combined['Beta_60'].fillna(method='bfill', inplace=True)
msci_combined['Beta_120'].fillna(method='bfill', inplace=True)
# 檢查缺失值
missing_values_after = msci_combined.isnull().sum()
print(missing_values_after)
# 保存處理後的數據到 CSV 文件
processed_output_file_path = '/content/drive/My Drive/stock_msci20/processed_stock_data_filled.csv'
msci_combined.to_csv(processed_output_file_path, index=False, encoding='utf-8-sig')
print(f"處理後的 MSCI Taiwan 21 數據已保存到: {processed_output_file_path}")
# 填補技術指標的缺失值之後的數據概覽
Open High Low Close Adj_Close \
count 13444.000000 13444.000000 13444.000000 13444.000000 13444.000000
mean 57.913840 58.354875 57.477498 57.932802 0.000000
std 33.427170 33.705493 33.140192 33.425061 1.000037
min 13.750000 13.850000 13.650000 13.650000 -1.391171
25% 25.700001 25.900000 25.600000 25.750000 -0.913565
50% 56.976191 57.463583 56.599998 57.006317 -0.049022
75% 80.900002 81.500000 80.300003 81.000000 0.679971
max 231.000000 236.000000 228.000000 234.000000 5.489825
Volume MA7 MA21 MA50 MA100 \
count 13444.000000 1.344400e+04 1.344400e+04 1.344400e+04 1.344400e+04
mean 0.000000 3.382530e-17 6.765060e-17 -2.029518e-16 1.014759e-16
std 1.000037 1.000037e+00 1.000037e+00 1.000037e+00 1.000037e+00
min -1.138741 -1.384310e+00 -1.377654e+00 -1.352509e+00 -1.332938e+00
25% -0.681806 -9.141113e-01 -9.107463e-01 -9.083360e-01 -9.004451e-01
50% -0.288653 -5.060366e-02 -5.426090e-02 -5.528471e-02 -4.067124e-02
75% 0.354388 6.775766e-01 6.748801e-01 6.853714e-01 6.859365e-01
max 7.462842 5.453941e+00 5.426634e+00 5.684700e+00 6.103182e+00
... %K %D WILLR14 OBV \
count ... 1.344400e+04 1.344400e+04 1.344400e+04 1.344400e+04
mean ... 4.228163e-17 -1.014759e-16 3.382530e-17 5.073795e-17
std ... 1.000037e+00 1.000037e+00 1.000037e+00 1.000037e+00
min ... -3.034973e+00 -3.280578e+00 -3.034973e+00 -1.715453e+00
25% ... -6.267550e-01 -6.355183e-01 -6.267550e-01 -8.129084e-01
50% ... 1.737074e-01 1.657172e-01 1.737074e-01 -2.442831e-01
75% ... 7.815844e-01 7.859595e-01 7.815844e-01 5.424447e-01
max ... 1.631922e+00 1.651208e+00 1.631922e+00 4.095976e+00
Market_Return Stock_Return Beta_60 Beta_120 Close_TAIEX \
count 1.344400e+04 1.344400e+04 1.344400e+04 1.344400e+04 13444.000000
mean -2.959714e-17 -1.929099e-17 -2.029518e-16 -1.014759e-16 15727.217465
std 1.000037e+00 1.000037e+00 1.000037e+00 1.000037e+00 2558.062858
min -3.362020e+00 -3.308643e+00 -2.253587e+00 -2.211252e+00 9218.669922
25% -5.912366e-01 -5.694901e-01 -6.288498e-01 -5.751071e-01 14177.459961
50% 2.561852e-02 -5.242189e-02 -1.065505e-01 -1.510994e-01 16179.559570
75% 6.191107e-01 5.713071e-01 5.751089e-01 6.417641e-01 17328.089844
max 3.065965e+00 3.344213e+00 3.386918e+00 3.084568e+00 23406.099609
Adj_Close_TAIEX
count 13444.000000
mean 15727.217465
std 2558.062858
min 9218.669922
25% 14177.459961
50% 16179.559570
75% 17328.089844
max 23406.099609
[8 rows x 34 columns]