1105_01_Dataset_Down_F
Step 0: 在 Google Drive 建立一個資料夾
/Semicon_Analysis/
Step 1: Mount Google Drive in Colab
from google.colab import drive
drive.mount('/content/drive')
#Check Drive Access
!ls /content/drive/My\ Drive/Semicon_Analysis/
![](https://thepearl.ghost.io/content/images/2024/10/---2024-10-16-15.22.38.png)
Step2 : Coding
import yfinance as yf
import pandas as pd
import os
from datetime import datetime
# Mount Google Drive in Colab (if running in Colab)
from google.colab import drive
drive.mount('/content/drive')
# Define file path to save the data in your Google Drive (adjust path as needed)
data_path = '/content/drive/My Drive/Semicon_Analysis/'
if not os.path.exists(data_path):
os.makedirs(data_path)
# Define company tickers
tickers = {
'Nvidia': 'NVDA',
'TSMC': '2330.TW', # Taiwan Stock Exchange
'Broadcom': 'AVGO',
'ASML': 'ASML',
'Samsung': '005930.KS', # Korea Stock Exchange
'AMD': 'AMD',
'Qualcomm': 'QCOM',
'Texas Instruments': 'TXN',
'Applied Materials': 'AMAT',
'Arm Holdings': 'ARM', # IPO in 2023
'Micron Technology': 'MU',
'Analog Devices': 'ADI',
'Lam Research': 'LRCX',
'KLA': 'KLAC',
'Intel': 'INTC',
'SK Hynix': '000660.KS', # Korea Stock Exchange
'Tokyo Electron': '8035.T', # Tokyo Stock Exchange
'Synopsys': 'SNPS',
'Marvell Technology': 'MRVL',
'MediaTek': '2454.TW' # Taiwan Stock Exchange
}
# Predefined data for specific companies
predefined_data = {
'Nvidia': {
'Funding Year': 1993,
'Employees': 29600,
'Major Products': 'GPU, AI Accelerators, Gaming Graphics Cards',
'Applications': 'AI, Gaming, Data Center, Autonomous Vehicles',
'Major Customers': 'Tesla, Google, Microsoft, AWS'
},
'TSMC': {
'Funding Year': 1987,
'Employees': 65152,
'Major Products': 'Semiconductor Manufacturing, Foundry Services',
'Applications': 'Mobile, Consumer Electronics, Automotive, High-Performance Computing',
'Major Customers': 'Apple, AMD, Qualcomm, Nvidia'
},
'Broadcom': {
'Funding Year': 1991,
'Employees': 20000,
'Major Products': 'Network Chips, Wi-Fi Chips, Storage Controllers',
'Applications': 'Networking, Wireless Communication, Data Storage',
'Major Customers': 'Apple, Cisco, Google, Facebook'
},
'ASML': {
'Funding Year': 1984,
'Employees': 41505,
'Major Products': 'Lithography Systems',
'Applications': 'Semiconductor Manufacturing',
'Major Customers': 'Intel, TSMC, Samsung'
},
'Samsung': {
'Funding Year': 1969,
'Employees': 127172,
'Major Products': 'Memory Chips, Displays, Smartphones',
'Applications': 'Mobile, Consumer Electronics, Data Center, Automotive',
'Major Customers': 'Apple, Qualcomm, Nvidia, Google'
},
'AMD': {
'Funding Year': 1969,
'Employees': 26000,
'Major Products': 'CPUs, GPUs, APUs',
'Applications': 'Gaming, Data Center, PCs, Servers',
'Major Customers': 'Microsoft, Sony, HP, Dell'
},
'Qualcomm': {
'Funding Year': 1985,
'Employees': 50000,
'Major Products': 'Mobile Processors, Modems, RF Front-End',
'Applications': 'Mobile, Automotive, IoT, 5G Networks',
'Major Customers': 'Apple, Samsung, Xiaomi, Vivo'
},
'Intel': {
'Funding Year': 1968,
'Employees': 124800,
'Major Products': 'Core Processors, Xeon Processors',
'Applications': 'PCs, Servers, Data Centers, AI',
'Major Customers': 'Dell, HP, Lenovo, Amazon'
},
'MediaTek': {
'Funding Year': 1997,
'Employees': 19600,
'Major Products': 'Mobile Chipsets, 5G Modems, IoT Solutions',
'Applications': 'Mobile, IoT, Consumer Electronics',
'Major Customers': 'Xiaomi, Oppo, Vivo, Amazon'
},
'Texas Instruments': {
'Funding Year': 1930,
'Employees': 34000,
'Major Products': 'Analog Chips, Embedded Processors',
'Applications': 'Industrial, Automotive, Personal Electronics',
'Major Customers': 'Boeing, Honeywell, Ford, Bosch'
},
'Applied Materials': {
'Funding Year': 1967,
'Employees': 35200,
'Major Products': 'Semiconductor Equipment, Display Equipment',
'Applications': 'Semiconductor Manufacturing, Display Manufacturing',
'Major Customers': 'TSMC, Intel, Samsung, Micron'
},
'Arm Holdings': {
'Funding Year': 1990,
'Employees': 7320,
'Major Products': 'ARM Architecture IP, Microprocessors',
'Applications': 'Mobile, Embedded, IoT, Automotive',
'Major Customers': 'Apple, Samsung, Qualcomm, Huawei'
},
'Micron Technology': {
'Funding Year': 1978,
'Employees': 48000,
'Major Products': 'Memory Chips (DRAM, NAND)',
'Applications': 'Mobile, Data Center, Automotive, Consumer Electronics',
'Major Customers': 'Dell, HP, Apple, Cisco'
},
'Analog Devices': {
'Funding Year': 1965,
'Employees': 26000,
'Major Products': 'Analog Chips, Mixed-Signal Chips, DSPs',
'Applications': 'Automotive, Industrial, Communication, Healthcare',
'Major Customers': 'Tesla, General Electric, Ford, Siemens'
},
'Lam Research': {
'Funding Year': 1980,
'Employees': 17450,
'Major Products': 'Semiconductor Manufacturing Equipment',
'Applications': 'Semiconductor Manufacturing',
'Major Customers': 'TSMC, Samsung, Micron, SK Hynix'
},
'KLA': {
'Funding Year': 1975,
'Employees': 15000,
'Major Products': 'Semiconductor Process Control Systems',
'Applications': 'Semiconductor Manufacturing, Process Control',
'Major Customers': 'Intel, TSMC, Samsung, GlobalFoundries'
},
'SK Hynix': {
'Funding Year': 1983,
'Employees': 31894,
'Major Products': 'Memory Chips (DRAM, NAND)',
'Applications': 'Mobile, Data Center, Consumer Electronics',
'Major Customers': 'Apple, Dell, Lenovo, HP'
},
'Tokyo Electron': {
'Funding Year': 1963,
'Employees': 17702,
'Major Products': 'Semiconductor Manufacturing Equipment',
'Applications': 'Semiconductor Manufacturing',
'Major Customers': 'TSMC, Samsung, Micron, Intel'
},
'Synopsys': {
'Funding Year': 1986,
'Employees': 20300,
'Major Products': 'Electronic Design Automation (EDA), Semiconductor IP',
'Applications': 'Semiconductor Design, Embedded Systems',
'Major Customers': 'Intel, TSMC, Samsung, Broadcom'
},
'Marvell Technology': {
'Funding Year': 1995,
'Employees': 6511,
'Major Products': 'Storage Controllers, Network Processors, SoCs',
'Applications': 'Data Center, Networking, Storage Solutions',
'Major Customers': 'Dell, Cisco, HP, Huawei'
}
}
# Define a function to fetch company data from yfinance
def get_company_data(ticker, company_name):
try:
stock = yf.Ticker(ticker)
info = stock.info
# Capture the current date
today = datetime.today().strftime('%Y-%m-%d')
# Check if predefined data exists for the company
if company_name in predefined_data:
data = predefined_data[company_name]
funding_year = data['Funding Year']
employees = data['Employees']
major_products = data['Major Products']
applications = data['Applications']
major_customers = data['Major Customers']
else:
funding_year = info.get('founded', None) # Company founding year
employees = info.get('fullTimeEmployees', None) # Number of employees
major_products = 'Technology' # Placeholder if not available
applications = 'Technology' # Placeholder if not available
major_customers = 'N/A' # Placeholder if not available
return {
'Date': today,
'Company Name': info.get('longName', ticker),
'Ticker': ticker,
'Industry': info.get('industry', 'Semiconductors'),
'Country': info.get('country', None),
'City': info.get('city', None),
'Website': info.get('website', None),
'Employees': employees,
'Funding Year': funding_year,
'Major Products': major_products,
'Applications': applications,
'Major Customers': major_customers
}
except Exception as e:
print(f"Error fetching data for {ticker}: {e}")
return None
# Initialize an empty list to store the data
company_data = []
# Loop through the tickers and get data
for company, ticker in tickers.items():
print(f"Fetching data for {company} ({ticker})...")
data = get_company_data(ticker, company)
if data:
company_data.append(data)
# Create a DataFrame
df = pd.DataFrame(company_data)
# Save the DataFrame to a CSV file in Google Drive
csv_file = os.path.join(data_path, 'semiconductor_companies_data_final.csv')
df.to_csv(csv_file, index=False)
print(f"Data saved to {csv_file}")
![](https://thepearl.ghost.io/content/images/2024/10/---2024-10-16-15.23.50.png)
查詢 Nvidia 市值
import yfinance as yf
import pandas as pd
# Define the company ticker for Nvidia
ticker = 'NVDA'
# Fetch data for Nvidia from 2024-09-01 to 2024-10-16
nvidia_data = yf.download(ticker, start='2024-09-01', end='2024-10-17')
# Get company information for Nvidia (to find shares outstanding)
nvidia_info = yf.Ticker(ticker).info
shares_outstanding = nvidia_info['sharesOutstanding'] # Get the number of shares
# Calculate market cap (Market Cap = Close Price * Shares Outstanding)
nvidia_data['Market Cap (USD)'] = nvidia_data['Close'] * shares_outstanding
# Select relevant columns
nvidia_market_cap = nvidia_data[['Market Cap (USD)']]
# Save to CSV (optional, in Google Drive if mounted)
nvidia_market_cap.to_csv('/content/drive/My Drive/Semicon_Analysis/Nvidia_Market_Cap_2024-09-01_to_2024-10-16.csv')
# Display the data
nvidia_market_cap.head()
匯率服務器
!pip install forex-python
![](https://thepearl.ghost.io/content/images/2024/10/---2024-10-16-15.40.20.png)
取得資料集
output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final_1975-2024.csv'
import yfinance as yf
import pandas as pd
import os
from forex_python.converter import CurrencyRates
from datetime import datetime
# Initialize forex converter
cr = CurrencyRates()
# Define file paths
input_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final.csv'
output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final_1975-2024.csv'
# Read the existing CSV file with company data
company_data = pd.read_csv(input_file)
# Get the list of tickers and their currencies (assuming 'Ticker' and 'Country' columns)
tickers = company_data['Ticker'].tolist()
currencies = {
'2330.TW': 'TWD', # TSMC - Taiwan Dollar
'005930.KS': 'KRW', # Samsung - Korean Won
'8035.T': 'JPY', # Tokyo Electron - Japanese Yen
# Add more tickers with non-USD currencies if needed
}
# Define the start and end dates for historical data
start_date = '1975-01-01'
end_date = datetime.today().strftime('%Y-%m-%d')
# Initialize an empty DataFrame to store market cap data for all companies
all_market_caps = pd.DataFrame()
# Loop through each ticker and get the historical market cap data
for ticker in tickers:
print(f"Fetching data for {ticker}...")
try:
# Download historical data
stock_data = yf.download(ticker, start=start_date, end=end_date)
company_info = yf.Ticker(ticker).info
shares_outstanding = company_info.get('sharesOutstanding', None)
if shares_outstanding:
# Calculate market cap in local currency
stock_data['Market Cap'] = stock_data['Close'] * shares_outstanding
# Check if the company is in a different currency
currency = currencies.get(ticker, 'USD') # Default to USD if not listed
stock_data['Currency'] = currency # Add currency column
if currency != 'USD':
# Convert market cap to USD using daily exchange rates
stock_data['Market Cap (USD)'] = stock_data.apply(
lambda row: cr.convert(currency, 'USD', row['Market Cap'], row.name), axis=1
)
else:
stock_data['Market Cap (USD)'] = stock_data['Market Cap']
# Add Close Price column
stock_data['Close Price'] = stock_data['Close']
# Add ticker to the data
stock_data['Ticker'] = ticker
all_market_caps = pd.concat([all_market_caps, stock_data[['Ticker', 'Close Price', 'Currency', 'Market Cap (USD)']]])
else:
print(f"Shares outstanding not available for {ticker}.")
except Exception as e:
print(f"Error fetching data for {ticker}: {e}")
# Reset index and save the data to a new CSV file
all_market_caps.reset_index(inplace=True)
all_market_caps.to_csv(output_file, index=False)
print(f"Market Cap data saved to {output_file}")
![](https://thepearl.ghost.io/content/images/2024/10/---2024-10-16-15.45.30.png)
合併數據集
output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_merged_1015.csv'
import pandas as pd
# 定義檔案路徑
file_1 = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final_1975-2024.csv'
file_2 = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final.csv'
output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_merged_1015.csv'
# 讀取兩個 CSV 檔案
df1 = pd.read_csv(file_1) # 含有 Date, Ticker, Currency, Close Price, Market Cap
df2 = pd.read_csv(file_2) # 含有公司相關的欄位
# 檢查前幾行資料,確保成功讀取
print("File 1 (semiconductor_companies_data_final_1975-2024.csv) 前幾行資料:")
print(df1.head())
print("File 2 (semiconductor_companies_data_final.csv) 前幾行資料:")
print(df2.head())
# 將第一個檔案的欄位名稱修改,以便合併
df1.rename(columns={'Currency': 'Currency_Local', 'Close Price': 'Close Price_Local'}, inplace=True)
# 選取第二個檔案中需要的欄位
df2_selected = df2[['Company Name', 'Ticker', 'Industry', 'Country', 'City', 'Website',
'Funding Year', 'Major Products', 'Applications', 'Major Customers']]
# 根據 Ticker 欄位進行合併
merged_df = pd.merge(df1, df2_selected, on='Ticker', how='left')
# 保存合併後的結果到新的 CSV 檔案
merged_df.to_csv(output_file, index=False)
print(f"Merged data saved to {output_file}")
![](https://thepearl.ghost.io/content/images/2024/10/---2024-10-16-15.44.49.png)