TKU_1105

1105_01_Dataset_Down_F

cy.lu

16 Oct 2024 • 7 min read

Step 0: 在 Google Drive 建立一個資料夾

/Semicon_Analysis/

Step 1: Mount Google Drive in Colab

from google.colab import drive
drive.mount('/content/drive')

#Check Drive Access
!ls /content/drive/My\ Drive/Semicon_Analysis/

Step2 : Coding

import yfinance as yf
import pandas as pd
import os
from datetime import datetime

# Mount Google Drive in Colab (if running in Colab)
from google.colab import drive
drive.mount('/content/drive')

# Define file path to save the data in your Google Drive (adjust path as needed)
data_path = '/content/drive/My Drive/Semicon_Analysis/'
if not os.path.exists(data_path):
    os.makedirs(data_path)

# Define company tickers
tickers = {
    'Nvidia': 'NVDA',
    'TSMC': '2330.TW',  # Taiwan Stock Exchange
    'Broadcom': 'AVGO',
    'ASML': 'ASML',
    'Samsung': '005930.KS',  # Korea Stock Exchange
    'AMD': 'AMD',
    'Qualcomm': 'QCOM',
    'Texas Instruments': 'TXN',
    'Applied Materials': 'AMAT',
    'Arm Holdings': 'ARM',  # IPO in 2023
    'Micron Technology': 'MU',
    'Analog Devices': 'ADI',
    'Lam Research': 'LRCX',
    'KLA': 'KLAC',
    'Intel': 'INTC',
    'SK Hynix': '000660.KS',  # Korea Stock Exchange
    'Tokyo Electron': '8035.T',  # Tokyo Stock Exchange
    'Synopsys': 'SNPS',
    'Marvell Technology': 'MRVL',
    'MediaTek': '2454.TW'  # Taiwan Stock Exchange
}

# Predefined data for specific companies
predefined_data = {
    'Nvidia': {
        'Funding Year': 1993,
        'Employees': 29600,
        'Major Products': 'GPU, AI Accelerators, Gaming Graphics Cards',
        'Applications': 'AI, Gaming, Data Center, Autonomous Vehicles',
        'Major Customers': 'Tesla, Google, Microsoft, AWS'
    },
    'TSMC': {
        'Funding Year': 1987,
        'Employees': 65152,
        'Major Products': 'Semiconductor Manufacturing, Foundry Services',
        'Applications': 'Mobile, Consumer Electronics, Automotive, High-Performance Computing',
        'Major Customers': 'Apple, AMD, Qualcomm, Nvidia'
    },
    'Broadcom': {
        'Funding Year': 1991,
        'Employees': 20000,
        'Major Products': 'Network Chips, Wi-Fi Chips, Storage Controllers',
        'Applications': 'Networking, Wireless Communication, Data Storage',
        'Major Customers': 'Apple, Cisco, Google, Facebook'
    },
    'ASML': {
        'Funding Year': 1984,
        'Employees': 41505,
        'Major Products': 'Lithography Systems',
        'Applications': 'Semiconductor Manufacturing',
        'Major Customers': 'Intel, TSMC, Samsung'
    },
    'Samsung': {
        'Funding Year': 1969,
        'Employees': 127172,
        'Major Products': 'Memory Chips, Displays, Smartphones',
        'Applications': 'Mobile, Consumer Electronics, Data Center, Automotive',
        'Major Customers': 'Apple, Qualcomm, Nvidia, Google'
    },
    'AMD': {
        'Funding Year': 1969,
        'Employees': 26000,
        'Major Products': 'CPUs, GPUs, APUs',
        'Applications': 'Gaming, Data Center, PCs, Servers',
        'Major Customers': 'Microsoft, Sony, HP, Dell'
    },
    'Qualcomm': {
        'Funding Year': 1985,
        'Employees': 50000,
        'Major Products': 'Mobile Processors, Modems, RF Front-End',
        'Applications': 'Mobile, Automotive, IoT, 5G Networks',
        'Major Customers': 'Apple, Samsung, Xiaomi, Vivo'
    },
    'Intel': {
        'Funding Year': 1968,
        'Employees': 124800,
        'Major Products': 'Core Processors, Xeon Processors',
        'Applications': 'PCs, Servers, Data Centers, AI',
        'Major Customers': 'Dell, HP, Lenovo, Amazon'
    },
    'MediaTek': {
        'Funding Year': 1997,
        'Employees': 19600,
        'Major Products': 'Mobile Chipsets, 5G Modems, IoT Solutions',
        'Applications': 'Mobile, IoT, Consumer Electronics',
        'Major Customers': 'Xiaomi, Oppo, Vivo, Amazon'
    },
    'Texas Instruments': {
        'Funding Year': 1930,
        'Employees': 34000,
        'Major Products': 'Analog Chips, Embedded Processors',
        'Applications': 'Industrial, Automotive, Personal Electronics',
        'Major Customers': 'Boeing, Honeywell, Ford, Bosch'
    },
    'Applied Materials': {
        'Funding Year': 1967,
        'Employees': 35200,
        'Major Products': 'Semiconductor Equipment, Display Equipment',
        'Applications': 'Semiconductor Manufacturing, Display Manufacturing',
        'Major Customers': 'TSMC, Intel, Samsung, Micron'
    },
    'Arm Holdings': {
        'Funding Year': 1990,
        'Employees': 7320,
        'Major Products': 'ARM Architecture IP, Microprocessors',
        'Applications': 'Mobile, Embedded, IoT, Automotive',
        'Major Customers': 'Apple, Samsung, Qualcomm, Huawei'
    },
    'Micron Technology': {
        'Funding Year': 1978,
        'Employees': 48000,
        'Major Products': 'Memory Chips (DRAM, NAND)',
        'Applications': 'Mobile, Data Center, Automotive, Consumer Electronics',
        'Major Customers': 'Dell, HP, Apple, Cisco'
    },
    'Analog Devices': {
        'Funding Year': 1965,
        'Employees': 26000,
        'Major Products': 'Analog Chips, Mixed-Signal Chips, DSPs',
        'Applications': 'Automotive, Industrial, Communication, Healthcare',
        'Major Customers': 'Tesla, General Electric, Ford, Siemens'
    },
    'Lam Research': {
        'Funding Year': 1980,
        'Employees': 17450,
        'Major Products': 'Semiconductor Manufacturing Equipment',
        'Applications': 'Semiconductor Manufacturing',
        'Major Customers': 'TSMC, Samsung, Micron, SK Hynix'
    },
    'KLA': {
        'Funding Year': 1975,
        'Employees': 15000,
        'Major Products': 'Semiconductor Process Control Systems',
        'Applications': 'Semiconductor Manufacturing, Process Control',
        'Major Customers': 'Intel, TSMC, Samsung, GlobalFoundries'
    },
    'SK Hynix': {
        'Funding Year': 1983,
        'Employees': 31894,
        'Major Products': 'Memory Chips (DRAM, NAND)',
        'Applications': 'Mobile, Data Center, Consumer Electronics',
        'Major Customers': 'Apple, Dell, Lenovo, HP'
    },
    'Tokyo Electron': {
        'Funding Year': 1963,
        'Employees': 17702,
        'Major Products': 'Semiconductor Manufacturing Equipment',
        'Applications': 'Semiconductor Manufacturing',
        'Major Customers': 'TSMC, Samsung, Micron, Intel'
    },
    'Synopsys': {
        'Funding Year': 1986,
        'Employees': 20300,
        'Major Products': 'Electronic Design Automation (EDA), Semiconductor IP',
        'Applications': 'Semiconductor Design, Embedded Systems',
        'Major Customers': 'Intel, TSMC, Samsung, Broadcom'
    },
    'Marvell Technology': {
        'Funding Year': 1995,
        'Employees': 6511,
        'Major Products': 'Storage Controllers, Network Processors, SoCs',
        'Applications': 'Data Center, Networking, Storage Solutions',
        'Major Customers': 'Dell, Cisco, HP, Huawei'
    }
}

# Define a function to fetch company data from yfinance
def get_company_data(ticker, company_name):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info

        # Capture the current date
        today = datetime.today().strftime('%Y-%m-%d')

        # Check if predefined data exists for the company
        if company_name in predefined_data:
            data = predefined_data[company_name]
            funding_year = data['Funding Year']
            employees = data['Employees']
            major_products = data['Major Products']
            applications = data['Applications']
            major_customers = data['Major Customers']
        else:
            funding_year = info.get('founded', None)  # Company founding year
            employees = info.get('fullTimeEmployees', None)  # Number of employees
            major_products = 'Technology'  # Placeholder if not available
            applications = 'Technology'  # Placeholder if not available
            major_customers = 'N/A'  # Placeholder if not available

        return {
            'Date': today,
            'Company Name': info.get('longName', ticker),
            'Ticker': ticker,
            'Industry': info.get('industry', 'Semiconductors'),
            'Country': info.get('country', None),
            'City': info.get('city', None),
            'Website': info.get('website', None),
            'Employees': employees,
            'Funding Year': funding_year,
            'Major Products': major_products,
            'Applications': applications,
            'Major Customers': major_customers
        }
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Initialize an empty list to store the data
company_data = []

# Loop through the tickers and get data
for company, ticker in tickers.items():
    print(f"Fetching data for {company} ({ticker})...")
    data = get_company_data(ticker, company)
    if data:
        company_data.append(data)

# Create a DataFrame
df = pd.DataFrame(company_data)

# Save the DataFrame to a CSV file in Google Drive
csv_file = os.path.join(data_path, 'semiconductor_companies_data_final.csv')
df.to_csv(csv_file, index=False)

print(f"Data saved to {csv_file}")

查詢 Nvidia 市值

import yfinance as yf
import pandas as pd

# Define the company ticker for Nvidia
ticker = 'NVDA'

# Fetch data for Nvidia from 2024-09-01 to 2024-10-16
nvidia_data = yf.download(ticker, start='2024-09-01', end='2024-10-17')

# Get company information for Nvidia (to find shares outstanding)
nvidia_info = yf.Ticker(ticker).info
shares_outstanding = nvidia_info['sharesOutstanding']  # Get the number of shares

# Calculate market cap (Market Cap = Close Price * Shares Outstanding)
nvidia_data['Market Cap (USD)'] = nvidia_data['Close'] * shares_outstanding

# Select relevant columns
nvidia_market_cap = nvidia_data[['Market Cap (USD)']]

# Save to CSV (optional, in Google Drive if mounted)
nvidia_market_cap.to_csv('/content/drive/My Drive/Semicon_Analysis/Nvidia_Market_Cap_2024-09-01_to_2024-10-16.csv')

# Display the data
nvidia_market_cap.head()

匯率服務器

!pip install forex-python

取得資料集

output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final_1975-2024.csv'

import yfinance as yf
import pandas as pd
import os
from forex_python.converter import CurrencyRates
from datetime import datetime

# Initialize forex converter
cr = CurrencyRates()

# Define file paths
input_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final.csv'
output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final_1975-2024.csv'

# Read the existing CSV file with company data
company_data = pd.read_csv(input_file)

# Get the list of tickers and their currencies (assuming 'Ticker' and 'Country' columns)
tickers = company_data['Ticker'].tolist()
currencies = {
    '2330.TW': 'TWD',  # TSMC - Taiwan Dollar
    '005930.KS': 'KRW',  # Samsung - Korean Won
    '8035.T': 'JPY',  # Tokyo Electron - Japanese Yen
    # Add more tickers with non-USD currencies if needed
}

# Define the start and end dates for historical data
start_date = '1975-01-01'
end_date = datetime.today().strftime('%Y-%m-%d')

# Initialize an empty DataFrame to store market cap data for all companies
all_market_caps = pd.DataFrame()

# Loop through each ticker and get the historical market cap data
for ticker in tickers:
    print(f"Fetching data for {ticker}...")
    try:
        # Download historical data
        stock_data = yf.download(ticker, start=start_date, end=end_date)
        company_info = yf.Ticker(ticker).info
        shares_outstanding = company_info.get('sharesOutstanding', None)

        if shares_outstanding:
            # Calculate market cap in local currency
            stock_data['Market Cap'] = stock_data['Close'] * shares_outstanding

            # Check if the company is in a different currency
            currency = currencies.get(ticker, 'USD')  # Default to USD if not listed
            stock_data['Currency'] = currency  # Add currency column

            if currency != 'USD':
                # Convert market cap to USD using daily exchange rates
                stock_data['Market Cap (USD)'] = stock_data.apply(
                    lambda row: cr.convert(currency, 'USD', row['Market Cap'], row.name), axis=1
                )
            else:
                stock_data['Market Cap (USD)'] = stock_data['Market Cap']

            # Add Close Price column
            stock_data['Close Price'] = stock_data['Close']

            # Add ticker to the data
            stock_data['Ticker'] = ticker
            all_market_caps = pd.concat([all_market_caps, stock_data[['Ticker', 'Close Price', 'Currency', 'Market Cap (USD)']]])
        else:
            print(f"Shares outstanding not available for {ticker}.")
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")

# Reset index and save the data to a new CSV file
all_market_caps.reset_index(inplace=True)
all_market_caps.to_csv(output_file, index=False)

print(f"Market Cap data saved to {output_file}")

合併數據集

output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_merged_1015.csv'

import pandas as pd

# 定義檔案路徑
file_1 = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final_1975-2024.csv'
file_2 = '/content/drive/My Drive/Semicon_Analysis/semiconductor_companies_data_final.csv'
output_file = '/content/drive/My Drive/Semicon_Analysis/semiconductor_merged_1015.csv'

# 讀取兩個 CSV 檔案
df1 = pd.read_csv(file_1)  # 含有 Date, Ticker, Currency, Close Price, Market Cap
df2 = pd.read_csv(file_2)  # 含有公司相關的欄位

# 檢查前幾行資料，確保成功讀取
print("File 1 (semiconductor_companies_data_final_1975-2024.csv) 前幾行資料:")
print(df1.head())

print("File 2 (semiconductor_companies_data_final.csv) 前幾行資料:")
print(df2.head())

# 將第一個檔案的欄位名稱修改，以便合併
df1.rename(columns={'Currency': 'Currency_Local', 'Close Price': 'Close Price_Local'}, inplace=True)

# 選取第二個檔案中需要的欄位
df2_selected = df2[['Company Name', 'Ticker', 'Industry', 'Country', 'City', 'Website',
                    'Funding Year', 'Major Products', 'Applications', 'Major Customers']]

# 根據 Ticker 欄位進行合併
merged_df = pd.merge(df1, df2_selected, on='Ticker', how='left')

# 保存合併後的結果到新的 CSV 檔案
merged_df.to_csv(output_file, index=False)

print(f"Merged data saved to {output_file}")