如何獲取 TAIEX 成分股代碼

如何獲取 TAIEX 成分股代碼
Photo by Thomas Tucker / Unsplash
!pip install pandas requests gspread oauth2client

import pandas as pd
import requests
import gspread
from google.colab import auth
from oauth2client.client import GoogleCredentials
from google.colab import drive

# 掛載 Google Drive
drive.mount('/content/drive')

# 獲取 TAIEX 成分股代碼
url = "https://isin.twse.com.tw/isin/C_public.jsp?strMode=2"
df_list = pd.read_html(url, encoding='big5')
df = df_list[0]

# 打印初始數據框來檢查內容
print("初始數據框:")
print(df.head())

# 清理數據
df.columns = df.iloc[0]
df = df[1:]
df = df[df['有價證券代號及名稱'].str.contains('上市')]

# 打印清理後的數據框來檢查內容
print("清理後的數據框:")
print(df.head())

# 選擇相關列
df = df[['有價證券代號及名稱']]

# 拆分代號和名稱
# 使用正則表達式來分割全角空格或其他空白字符
df[['Ticker', 'Name']] = df['有價證券代號及名稱'].str.extract(r'(\d+)\s+(.+)')

# 保留必要的列
df = df[['Ticker', 'Name']]

# 顯示所有成分股
print("TAIEX 成分股代碼及名稱:")
print(df.to_string(index=False))

# 將結果保存到 Google Drive 中的 CSV 文件
output_file_path = '/content/drive/My Drive/TAIEX_constituents.csv'
df.to_csv(output_file_path, index=False, encoding='utf-8-sig')

print(f"TAIEX 成分股代碼已保存到: {output_file_path}")


!pip install yfinance

import yfinance as yf
import os

def download_stock_data(tickers, start_date, end_date, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    for ticker in tickers:
        print(f"Downloading {ticker} data...")
        stock_data = yf.download(f"{ticker}.TW", start=start_date, end=end_date)
        file_path = os.path.join(folder_path, f"{ticker}.csv")
        stock_data.to_csv(file_path)
        print(f"Saved {ticker} data to {file_path}")

# 從 CSV 文件中讀取 TAIEX 成分股代碼
taiex_df = pd.read_csv('/content/drive/My Drive/TAIEX_constituents.csv')
taiex_tickers = taiex_df['Ticker'].tolist()

# 定義時間範圍
start_date = "2010-01-01"
end_date = "2023-12-31"

# 下載數據並保存到 Google Drive
folder_path = "/content/drive/My Drive/StockData"
download_stock_data(taiex_tickers, start_date, end_date, folder_path)