Close_TAIEX -N.Scatter Plot Matrix_0630
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# Load the data
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data.csv'
data = pd.read_csv(file_path)
# Select only numerical columns
numerical_data = data.select_dtypes(include=[np.number])
# Handle NaN and infinite values by replacing them with the mean of the column
numerical_data = numerical_data.apply(lambda x: np.where(np.isfinite(x), x, np.nan))
numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean()), axis=0)
# Calculate Pearson correlation with target variable 'Close_TAIEX'
target_variable = 'Close_TAIEX'
correlation_results = {}
for column in numerical_data.columns:
if column != target_variable:
correlation, p_value = pearsonr(numerical_data[target_variable], numerical_data[column])
correlation_results[column] = {'Pearson Correlation': correlation, 'P-value': p_value}
# Convert results to DataFrame for better visualization
correlation_df = pd.DataFrame.from_dict(correlation_results, orient='index')
correlation_df = correlation_df.sort_values(by='Pearson Correlation', ascending=False)
print("Pearson Correlation with Close_TAIEX and corresponding P-values:")
print(correlation_df)
# Plot scatter plots for each explanatory variable vs. Close_TAIEX
plt.figure(figsize=(20, 20))
for i, column in enumerate(correlation_df.index):
plt.subplot(6, 6, i + 1) # Adjust the number of rows and columns based on the number of variables
sns.scatterplot(x=numerical_data[target_variable], y=numerical_data[column])
plt.title(f'{column} vs. {target_variable}\n(r={correlation_df.loc[column, "Pearson Correlation"]:.2f}, p={correlation_df.loc[column, "P-value"]:.2e})')
plt.xlabel(target_variable)
plt.ylabel(column)
plt.tight_layout()
plt.show()
import pandas as pd import yfinance as yf import ta from datetime import datetime
Mounted at /content/drive
Pearson Correlation with Close_TAIEX and corresponding P-values:
Pearson Correlation P-value
RSI21 0.181235 1.788855e-239
RSI14 0.149536 7.892582e-163
CMO14 0.097049 2.915690e-69
RSI7 0.094208 2.352703e-65
Aroon Up 0.092640 3.003308e-63
CCI20 0.085391 5.606595e-54
MACD Line 0.080482 3.944215e-48
Signal Line 0.079971 1.530058e-47
%D 0.065723 1.223437e-32
%K 0.062314 1.670302e-29
WILLR14 0.062314 1.670302e-29
Market Return 0.053534 3.417693e-22
Adj_Close 0.040832 1.508200e-13
Lower Band 0.025166 5.331465e-06
Stock Return 0.024956 6.379107e-06
Low 0.022923 3.390795e-05
Close_MSCI 0.022754 3.870897e-05
Open 0.022309 5.473519e-05
MA7 0.022306 5.485074e-05
High 0.022106 6.395461e-05
Middle Band 0.021395 1.092130e-04
MA21 0.021350 1.128899e-04
MA50 0.020800 1.688435e-04
MA100 0.018208 9.919776e-04
Upper Band 0.018121 1.049533e-03
MACD Histogram 0.012237 2.691003e-02
Volume -0.014191 1.028059e-02
Band Width -0.021713 8.609264e-05
Aroon Down -0.081898 8.808426e-50
Beta_120 -0.124150 1.815258e-112
Beta_60 -0.164698 1.474482e-197
import pandas as pd import yfinance as yf import ta from datetime import datetime
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-23b836d5-fcf9-4299-a01a-4331031087da.png)
將相關技術指標分為微弱相關、正相關和負相關的表格,包括它們的相關係數(r)和 p 值:
類別 | 指標 | r 值 | p 值 |
---|---|---|---|
微弱相關 | RSI21 | -0.01 | 7.19e-239 |
RSI14 | 0.03 | 7.19e-168 | |
RSI7 | -0.09 | 2.73e-56 | |
Aroon Up | 0.03 | 9.03e-63 | |
Market Return | 0.09 | 2.43e-22 | |
Adj Close | 0.04 | 1.15e-13 | |
Lower Band | 0.03 | 5.33e-06 | |
Stock Return | 0.02 | 3.16e-06 | |
Low | 0.02 | 3.39e-05 | |
Close MSCI | 0.02 | 3.78e-05 | |
Open | 0.02 | 5.47e-05 | |
MA7 | 0.02 | 4.95e-05 | |
High | 0.02 | 6.46e-05 | |
Middle Band | 0.02 | 9.96e-04 | |
MA21 | 0.02 | 1.34e-04 | |
MA50 | 0.02 | 1.96e-04 | |
MA10 | 0.02 | 9.92e-04 | |
Upper Band | 0.02 | 5.05e-03 | |
MACD Histogram | 0.01 | 3.10e-02 | |
Band Width | -0.02 | 8.16e-05 | |
正相關 | RSI14 | 0.03 | 7.19e-168 |
CCI20 | 0.09 | 9.15e-54 | |
MACD Line | 0.05 | 1.11e-45 | |
Signal Line | 0.05 | 1.71e-42 | |
WILLR14 | 0.09 | 1.67e-29 | |
負相關 | CMO14 | -0.19 | 2.92e-69 |
RSI7 | -0.09 | 2.73e-56 | |
%D | -0.07 | 1.12e-29 | |
%K | -0.06 | 1.47e-29 | |
Aroon Down | -0.08 | 8.18e-50 | |
Beta 120 | -0.12 | 1.82e-112 |
變數分群的討論
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-77801a36-0e8b-4f8e-b00b-14ebfa9e4dc7.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-a6874c53-a155-4c16-9a9a-382f13fb062b.png)
OPEN_High_Low_Close
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-8e1eef19-28cd-44df-8060-b9c7b8b3c7ba.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-cc6ad849-7703-4055-9f75-0007fe540a66.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-7c228056-b1e3-4e9a-ade8-20746445b402.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-4df18683-5470-4174-a783-f86447e0b31c.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-dcee124f-af8b-4ea4-8764-082bf39205e3.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-15f59443-6960-43c7-9c14-e11385444970.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-47e70236-c7ba-4321-a894-11357624b690.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-8622f6a7-568e-4aff-883a-8a542bc6695a.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-0b11be40-986c-491c-8cb8-81d183c456e2.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-cafc2623-6032-49a2-bfea-70a2660cc8f2.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-10b29bf0-0157-47cb-a784-868f76c08093.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-9c4f79ae-f76d-4edd-95bc-291d1394d66b.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-7a327b79-9088-4d45-804f-e27dc0f78cd4.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-fc95618a-37ea-473a-91f8-61d07641477e.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-7b481460-42ab-4e77-a380-bad6f5badc47.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-79110c05-e663-4901-9149-f15500fb3750.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-3d468d31-cb13-4e30-9eec-edd50a7df2e5.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-9d774264-1fd8-41ce-a70c-9ba222733d59.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-b43f6648-8d4a-4432-8cc2-65ae827ea982.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-6effec87-dbbe-4710-899f-4e6e25a87b1c.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-f57ddc74-279e-492b-86ab-bc9a0a13c084.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-e9fa4af8-6e69-4115-8190-700285205873.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-c03d2f1b-77be-4abc-b8e4-ddd924b8077f.png)
![](https://thepearl.ghost.io/content/images/2024/06/data-src-image-eef2101d-1e25-47b5-b9d8-7e250cc45744.png)
Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# Load the data
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data.csv'
data = pd.read_csv(file_path)
# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')
# Define the target variable
target_variable = 'Close_TAIEX'
# Function to create scatter plot matrix by variable
def scatter_plot_matrix_by_variable():
variables = [col for col in data.columns if col not in ['Date', 'ST_Code', 'ST_Name', target_variable]]
for variable in variables:
plt.figure(figsize=(25, 25))
unique_stock_codes = data['ST_Code'].unique()
for i, stock_code in enumerate(unique_stock_codes):
stock_data = data[data['ST_Code'] == stock_code]
taiex_data = data[['Date', target_variable]].drop_duplicates()
aligned_data = pd.merge(stock_data, taiex_data, on='Date', suffixes=('', '_TAIEX'))
aligned_data = aligned_data.replace([np.inf, -np.inf], np.nan).dropna()
if aligned_data.shape[0] > 0 and variable in aligned_data.columns:
correlation, p_value = pearsonr(aligned_data[target_variable], aligned_data[variable])
plt.subplot((len(unique_stock_codes) + 4) // 5, 5, i + 1)
sns.scatterplot(x=aligned_data[target_variable], y=aligned_data[variable])
sns.regplot(x=aligned_data[target_variable], y=aligned_data[variable], scatter=False, color='red', ci=95, line_kws={'linestyle': 'dashed'})
stock_name = aligned_data['ST_Name'].iloc[0] # Get the stock name from the first row
plt.title(f'{stock_code} ({stock_name})\n(r={correlation:.2f}, p={p_value:.2e})')
plt.xlabel('TAIEX')
plt.ylabel(variable)
plt.suptitle(f'Scatter Plot Matrix for {variable}', fontsize=20)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(f'/content/drive/My Drive/scatter_plot_matrix_{variable}.png')
plt.show()
# Generate scatter plot matrix by variable
scatter_plot_matrix_by_variable()
import pandas as pd import yfinance as yf import ta from datetime import datetime