NXS1.0_MSCI.TW.Top20.Download

NXS1.0_MSCI.TW.Top20.Download
Photo by Alina Grubnyak / Unsplash

下載 MSCI TOP 20 股票的交易資料

!pip install pandas yfinance ta
import pandas as pd
import yfinance as yf
import ta
from google.colab import drive

# 連接 Google Drive
drive.mount('/content/drive')

# 定義 MSCI Taiwan Index 前 20 大成份股的股票代碼和名稱
msci_taiwan_20 = {
    "2330.TW": "TSMC",
    "2317.TW": "Hon Hai",
    "2454.TW": "MediaTek",
    "2382.TW": "Quanta",
    "2881.TW": "Fubon FHC",
    "2308.TW": "Delta",
    "2412.TW": "Chunghwa Telecom",
    "2882.TW": "Cathay FHC",
    "2891.TW": "CTBC FHC",
    "3711.TW": "ASE Technology",
    "2303.TW": "UMC",
    "6505.TW": "Formosa Petrochemical",
    "2886.TW": "Mega FHC",
    "6669.TW": "Wiwynn",
    "1216.TW": "Uni-President",
    "2884.TW": "E.SUN FHC",
    "2885.TW": "Yuanta FHC",
    "3008.TW": "Largan",
    "5880.TW": "Taiwan Cooperative FHC",
    "1303.TW": "Nan Ya Plastics"
}

# 定義下載數據的時間範圍
start_date = "2020-01-01"
end_date = "2024-06-30"

# 下載 Yahoo Finance 股票數據
def fetch_yahoo_data(ticker, start, end, stock_name=None):
    df = yf.download(ticker, start=start, end=end)
    df['ST_Code'] = ticker
    if stock_name:
        df['ST_Name'] = stock_name
    df = df.rename(columns={"Adj Close": "Adj_Close"})
    df.reset_index(inplace=True)
    df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y/%m/%d')
    return df

# 從 Yahoo Finance 獲取 TAIEX 指數數據
def fetch_taiex_data(start, end):
    df = yf.download("^TWII", start=start, end=end)
    df = df.rename(columns={"Adj Close": "Adj_Close_TAIEX", "Close": "Close_TAIEX"})
    df.reset_index(inplace=True)
    df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y/%m/%d')
    return df

# 計算技術指標的函數
def calculate_indicators(df, market_close):
    df = df.copy()  # 確保不會修改原始數據框
    df['Adj_Close'] = pd.to_numeric(df['Adj_Close'], errors='coerce')

    df['MA7'] = df['Adj_Close'].rolling(window=7).mean()  # 移動平均線 7 日
    df['MA21'] = df['Adj_Close'].rolling(window=21).mean()  # 移動平均線 21 日
    df['MA50'] = df['Adj_Close'].rolling(window=50).mean()  # 移動平均線 50 日
    df['MA100'] = df['Adj_Close'].rolling(window=100).mean()  # 移動平均線 100 日

    # 布林帶
    bb_indicator = ta.volatility.BollingerBands(close=df['Adj_Close'], window=20, window_dev=2)
    df['Middle_Band'] = bb_indicator.bollinger_mavg()
    df['Upper_Band'] = bb_indicator.bollinger_hband()
    df['Lower_Band'] = bb_indicator.bollinger_lband()
    df['Band_Width'] = df['Upper_Band'] - df['Lower_Band']

    # 阿隆指標
    aroon_indicator = ta.trend.AroonIndicator(high=df['High'], low=df['Low'], window=25)
    df['Aroon_Up'] = aroon_indicator.aroon_up()
    df['Aroon_Down'] = aroon_indicator.aroon_down()

    # 商品渠道指數(CCI)
    df['CCI20'] = ta.trend.cci(high=df['High'], low=df['Low'], close=df['Adj_Close'], window=20)

    # Chande 動量擺動指標(CMO)
    df['CMO14'] = calculate_cmo(df['Adj_Close'], window=14)

    # MACD
    macd_indicator = ta.trend.MACD(close=df['Adj_Close'])
    df['MACD_Line'] = macd_indicator.macd()
    df['Signal_Line'] = macd_indicator.macd_signal()
    df['MACD_Histogram'] = macd_indicator.macd_diff()

    # RSI
    df['RSI7'] = ta.momentum.rsi(close=df['Adj_Close'], window=7)
    df['RSI14'] = ta.momentum.rsi(close=df['Adj_Close'], window=14)
    df['RSI21'] = ta.momentum.rsi(close=df['Adj_Close'], window=21)

    # 隨機指標
    stochastic_indicator = ta.momentum.StochasticOscillator(high=df['High'], low=df['Low'], close=df['Adj_Close'], window=14, smooth_window=3)
    df['%K'] = stochastic_indicator.stoch()
    df['%D'] = stochastic_indicator.stoch_signal()

    # 威廉指標
    df['WILLR14'] = ta.momentum.williams_r(high=df['High'], low=df['Low'], close=df['Adj_Close'], lbp=14)

    # 平衡交易量(OBV)
    df['OBV'] = ta.volume.on_balance_volume(close=df['Adj_Close'], volume=df['Volume'])

    # 計算 Beta 指標
    df['Market_Return'] = market_close.pct_change()
    df['Stock_Return'] = df['Adj_Close'].pct_change()
    df['Beta_60'] = df['Stock_Return'].rolling(window=60).cov(df['Market_Return']) / df['Market_Return'].rolling(window=60).var()
    df['Beta_120'] = df['Stock_Return'].rolling(window=120).cov(df['Market_Return']) / df['Market_Return'].rolling(window=120).var()

    return df

# 計算 Chande Momentum Oscillator (CMO) 的函數
def calculate_cmo(data, window):
    delta = data.diff()
    up = delta.where(delta > 0, 0.0)
    down = -delta.where(delta < 0, 0.0)
    sum_up = up.rolling(window=window).sum()
    sum_down = down.rolling(window=window).sum()
    cmo = 100 * (sum_up - sum_down) / (sum_up + sum_down)
    return cmo

# 從 Yahoo Finance 獲取 TAIEX 指數數據
taiex_data = fetch_taiex_data(start_date, end_date)
market_close = taiex_data['Adj_Close_TAIEX']

# 下載 MSCI Taiwan 21 股票數據並添加技術指標
msci_data_list = []

for ticker, name in msci_taiwan_21.items():
    try:
        data = fetch_yahoo_data(ticker, start_date, end_date, stock_name=name)
        data = calculate_indicators(data, market_close)
        msci_data_list.append(data)
    except Exception as e:
        print(f"股票 {ticker} 的數據處理時出現錯誤:{e}")

# 合併所有 MSCI Taiwan 21 股票數據到一個數據框
msci_combined = pd.concat(msci_data_list, axis=0, ignore_index=True)

# 將 TAIEX 數據合併到 MSCI Taiwan 21 數據中
taiex_data = taiex_data.rename(columns={"Close_TAIEX": "Close_TAIEX", "Adj_Close_TAIEX": "Adj_Close_TAIEX"})
msci_combined = pd.merge(msci_combined, taiex_data[['Date', 'Close_TAIEX', 'Adj_Close_TAIEX']], on='Date', how='left')

# 調整列的順序
columns = ['Date', 'ST_Code', 'ST_Name'] + [col for col in msci_combined.columns if col not in ['Date', 'ST_Code', 'ST_Name']]
msci_combined = msci_combined[columns]

# 顯示合併後的數據的前五筆資料
print(msci_combined.head())

# 保存合併後的數據到 Google Drive 中的 CSV 文件
combined_output_file_path = '/content/drive/My Drive/stock_msci20/stock_20_0710.csv'
msci_combined.to_csv(combined_output_file_path, index=False, encoding='utf-8-sig')
print(f"合併後的 MSCI Taiwan 21 數據已保存到: {combined_output_file_path}")



        Date  ST_Code ST_Name   Open   High    Low  Close   Adj_Close  \
0  2020/01/02  2330.TW    TSMC  332.5  339.0  332.5  339.0  307.796692   
1  2020/01/03  2330.TW    TSMC  344.0  345.0  335.5  339.5  308.250580   
2  2020/01/06  2330.TW    TSMC  333.0  334.5  332.0  332.0  301.441010   
3  2020/01/07  2330.TW    TSMC  332.5  333.0  326.5  329.5  299.171082   
4  2020/01/08  2330.TW    TSMC  325.0  333.0  325.0  329.5  299.171082   

     Volume  MA7  ...  %K  %D  WILLR14       OBV  Market_Return  Stock_Return  \
0  31754120  NaN  ... NaN NaN      NaN  31754120            NaN           NaN   
1  41811268  NaN  ... NaN NaN      NaN  73565388       0.000822      0.001475   
2  45343057  NaN  ... NaN NaN      NaN  28222331      -0.012970     -0.022091   
3  50879181  NaN  ... NaN NaN      NaN -22656850      -0.006110     -0.007530   
4  37567748  NaN  ... NaN NaN      NaN  14910898      -0.005321      0.000000   

   Beta_60  Beta_120   Close_TAIEX  Adj_Close_TAIEX  
0      NaN       NaN  12100.480469     12100.480469  
1      NaN       NaN  12110.429688     12110.429688  
2      NaN       NaN  11953.360352     11953.360352  
3      NaN       NaN  11880.320312     11880.320312  
4      NaN       NaN  11817.099609     11817.099609  

[5 rows x 37 columns]
合併後的 MSCI Taiwan 21 數據已保存到: /content/drive/My Drive/stock_msci20/processed_stock_data.csv




# 檢查缺失值
missing_values = msci_combined.isnull().sum()
print(missing_values)

Date                  0
ST_Code               0
ST_Name               0
Open                  0
High                  0
Low                   0
Close                 0
Adj_Close             0
Volume                0
MA7                 120
MA21                400
MA50                980
MA100              1980
Middle_Band         380
Upper_Band          380
Lower_Band          380
Band_Width          380
Aroon_Up            500
Aroon_Down          500
CCI20               380
CMO14               260
MACD_Line           500
Signal_Line         660
MACD_Histogram      660
RSI7                120
RSI14               260
RSI21               400
%K                  260
%D                  300
WILLR14             260
OBV                   0
Market_Return        20
Stock_Return         20
Beta_60            1200
Beta_120           2400
Close_TAIEX           0
Adj_Close_TAIEX       0
dtype: int64




# 填補技術指標的缺失值
msci_combined['MA7'].fillna(method='bfill', inplace=True)
msci_combined['MA21'].fillna(method='bfill', inplace=True)
msci_combined['MA50'].fillna(method='bfill', inplace=True)
msci_combined['MA100'].fillna(method='bfill', inplace=True)
msci_combined['Middle_Band'].fillna(method='bfill', inplace=True)
msci_combined['Upper_Band'].fillna(method='bfill', inplace=True)
msci_combined['Lower_Band'].fillna(method='bfill', inplace=True)
msci_combined['Band_Width'].fillna(method='bfill', inplace=True)
msci_combined['Aroon_Up'].fillna(method='bfill', inplace=True)
msci_combined['Aroon_Down'].fillna(method='bfill', inplace=True)
msci_combined['CCI20'].fillna(method='bfill', inplace=True)
msci_combined['CMO14'].fillna(method='bfill', inplace=True)
msci_combined['MACD_Line'].fillna(method='bfill', inplace=True)
msci_combined['Signal_Line'].fillna(method='bfill', inplace=True)
msci_combined['MACD_Histogram'].fillna(method='bfill', inplace=True)
msci_combined['RSI7'].fillna(method='bfill', inplace=True)
msci_combined['RSI14'].fillna(method='bfill', inplace=True)
msci_combined['RSI21'].fillna(method='bfill', inplace=True)
msci_combined['%K'].fillna(method='bfill', inplace=True)
msci_combined['%D'].fillna(method='bfill', inplace=True)
msci_combined['WILLR14'].fillna(method='bfill', inplace=True)
msci_combined['Market_Return'].fillna(method='bfill', inplace=True)
msci_combined['Stock_Return'].fillna(method='bfill', inplace=True)
msci_combined['Beta_60'].fillna(method='bfill', inplace=True)
msci_combined['Beta_120'].fillna(method='bfill', inplace=True)

# 檢查缺失值
missing_values_after = msci_combined.isnull().sum()
print(missing_values_after)

# 保存處理後的數據到 CSV 文件
processed_output_file_path = '/content/drive/My Drive/stock_msci20/processed_stock_data_filled.csv'
msci_combined.to_csv(processed_output_file_path, index=False, encoding='utf-8-sig')
print(f"處理後的 MSCI Taiwan 21 數據已保存到: {processed_output_file_path}")
# 填補技術指標的缺失值之後的數據概覽
               Open          High           Low         Close     Adj_Close  \
count  13444.000000  13444.000000  13444.000000  13444.000000  13444.000000   
mean      57.913840     58.354875     57.477498     57.932802      0.000000   
std       33.427170     33.705493     33.140192     33.425061      1.000037   
min       13.750000     13.850000     13.650000     13.650000     -1.391171   
25%       25.700001     25.900000     25.600000     25.750000     -0.913565   
50%       56.976191     57.463583     56.599998     57.006317     -0.049022   
75%       80.900002     81.500000     80.300003     81.000000      0.679971   
max      231.000000    236.000000    228.000000    234.000000      5.489825   

             Volume           MA7          MA21          MA50         MA100  \
count  13444.000000  1.344400e+04  1.344400e+04  1.344400e+04  1.344400e+04   
mean       0.000000  3.382530e-17  6.765060e-17 -2.029518e-16  1.014759e-16   
std        1.000037  1.000037e+00  1.000037e+00  1.000037e+00  1.000037e+00   
min       -1.138741 -1.384310e+00 -1.377654e+00 -1.352509e+00 -1.332938e+00   
25%       -0.681806 -9.141113e-01 -9.107463e-01 -9.083360e-01 -9.004451e-01   
50%       -0.288653 -5.060366e-02 -5.426090e-02 -5.528471e-02 -4.067124e-02   
75%        0.354388  6.775766e-01  6.748801e-01  6.853714e-01  6.859365e-01   
max        7.462842  5.453941e+00  5.426634e+00  5.684700e+00  6.103182e+00   

       ...            %K            %D       WILLR14           OBV  \
count  ...  1.344400e+04  1.344400e+04  1.344400e+04  1.344400e+04   
mean   ...  4.228163e-17 -1.014759e-16  3.382530e-17  5.073795e-17   
std    ...  1.000037e+00  1.000037e+00  1.000037e+00  1.000037e+00   
min    ... -3.034973e+00 -3.280578e+00 -3.034973e+00 -1.715453e+00   
25%    ... -6.267550e-01 -6.355183e-01 -6.267550e-01 -8.129084e-01   
50%    ...  1.737074e-01  1.657172e-01  1.737074e-01 -2.442831e-01   
75%    ...  7.815844e-01  7.859595e-01  7.815844e-01  5.424447e-01   
max    ...  1.631922e+00  1.651208e+00  1.631922e+00  4.095976e+00   

       Market_Return  Stock_Return       Beta_60      Beta_120   Close_TAIEX  \
count   1.344400e+04  1.344400e+04  1.344400e+04  1.344400e+04  13444.000000   
mean   -2.959714e-17 -1.929099e-17 -2.029518e-16 -1.014759e-16  15727.217465   
std     1.000037e+00  1.000037e+00  1.000037e+00  1.000037e+00   2558.062858   
min    -3.362020e+00 -3.308643e+00 -2.253587e+00 -2.211252e+00   9218.669922   
25%    -5.912366e-01 -5.694901e-01 -6.288498e-01 -5.751071e-01  14177.459961   
50%     2.561852e-02 -5.242189e-02 -1.065505e-01 -1.510994e-01  16179.559570   
75%     6.191107e-01  5.713071e-01  5.751089e-01  6.417641e-01  17328.089844   
max     3.065965e+00  3.344213e+00  3.386918e+00  3.084568e+00  23406.099609   

       Adj_Close_TAIEX  
count     13444.000000  
mean      15727.217465  
std        2558.062858  
min        9218.669922  
25%       14177.459961  
50%       16179.559570  
75%       17328.089844  
max       23406.099609  

[8 rows x 34 columns]