**TAIEX.ML.s11_Scatter Plot.散布圖(1)
'**有些變數的視覺化趨勢就蠻明顯的(見下圖)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# Load the data
file_path = '/content/drive/MyDrive/dataset/MSCI_Taiwan_30_data_with_OBV.csv'
data = pd.read_csv(file_path)
# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')
# Define the target variable
target_variable = 'Close_TAIEX'
# Function to create scatter plot matrix by variable
def scatter_plot_matrix_by_variable():
variables = [col for col in data.columns if col not in ['Date', 'ST_Code', 'ST_Name', target_variable]]
for variable in variables:
plt.figure(figsize=(25, 25))
unique_stock_codes = data['ST_Code'].unique()
for i, stock_code in enumerate(unique_stock_codes):
stock_data = data[data['ST_Code'] == stock_code]
taiex_data = data[['Date', target_variable]].drop_duplicates()
aligned_data = pd.merge(stock_data, taiex_data, on='Date', suffixes=('', '_TAIEX'))
aligned_data = aligned_data.replace([np.inf, -np.inf], np.nan).dropna()
if aligned_data.shape[0] > 0 and variable in aligned_data.columns:
correlation, p_value = pearsonr(aligned_data[target_variable], aligned_data[variable])
plt.subplot((len(unique_stock_codes) + 4) // 5, 5, i + 1)
sns.scatterplot(x=aligned_data[target_variable], y=aligned_data[variable])
sns.regplot(x=aligned_data[target_variable], y=aligned_data[variable], scatter=False, color='red', ci=95, line_kws={'linestyle': 'dashed'})
stock_name = aligned_data['ST_Name'].iloc[0] # Get the stock name from the first row
plt.title(f'{stock_code} ({stock_name})\n(r={correlation:.2f}, p={p_value:.2e})')
plt.xlabel('TAIEX')
plt.ylabel(variable)
plt.suptitle(f'Scatter Plot Matrix for {variable}', fontsize=20)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(f'/content/drive/My Drive/scatter_plot_matrix_{variable}.png')
plt.show()
# Generate scatter plot matrix by variable
scatter_plot_matrix_by_variable()