Close_TAIEX -N.Scatter Plot Matrix_0630

Close_TAIEX -N.Scatter Plot Matrix_0630
Photo by Planet Volumes / Unsplash
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Load the data
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data.csv'
data = pd.read_csv(file_path)

# Select only numerical columns
numerical_data = data.select_dtypes(include=[np.number])

# Handle NaN and infinite values by replacing them with the mean of the column
numerical_data = numerical_data.apply(lambda x: np.where(np.isfinite(x), x, np.nan))
numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean()), axis=0)

# Calculate Pearson correlation with target variable 'Close_TAIEX'
target_variable = 'Close_TAIEX'
correlation_results = {}

for column in numerical_data.columns:
    if column != target_variable:
        correlation, p_value = pearsonr(numerical_data[target_variable], numerical_data[column])
        correlation_results[column] = {'Pearson Correlation': correlation, 'P-value': p_value}

# Convert results to DataFrame for better visualization
correlation_df = pd.DataFrame.from_dict(correlation_results, orient='index')
correlation_df = correlation_df.sort_values(by='Pearson Correlation', ascending=False)

print("Pearson Correlation with Close_TAIEX and corresponding P-values:")
print(correlation_df)

# Plot scatter plots for each explanatory variable vs. Close_TAIEX
plt.figure(figsize=(20, 20))
for i, column in enumerate(correlation_df.index):
    plt.subplot(6, 6, i + 1)  # Adjust the number of rows and columns based on the number of variables
    sns.scatterplot(x=numerical_data[target_variable], y=numerical_data[column])
    plt.title(f'{column} vs. {target_variable}\n(r={correlation_df.loc[column, "Pearson Correlation"]:.2f}, p={correlation_df.loc[column, "P-value"]:.2e})')
    plt.xlabel(target_variable)
    plt.ylabel(column)
plt.tight_layout()
plt.show()

import pandas as pd import yfinance as yf import ta from datetime import datetime

Mounted at /content/drive
Pearson Correlation with Close_TAIEX and corresponding P-values:
                Pearson Correlation        P-value
RSI21                      0.181235  1.788855e-239
RSI14                      0.149536  7.892582e-163
CMO14                      0.097049   2.915690e-69
RSI7                       0.094208   2.352703e-65
Aroon Up                   0.092640   3.003308e-63
CCI20                      0.085391   5.606595e-54
MACD Line                  0.080482   3.944215e-48
Signal Line                0.079971   1.530058e-47
%D                         0.065723   1.223437e-32
%K                         0.062314   1.670302e-29
WILLR14                    0.062314   1.670302e-29
Market Return              0.053534   3.417693e-22
Adj_Close                  0.040832   1.508200e-13
Lower Band                 0.025166   5.331465e-06
Stock Return               0.024956   6.379107e-06
Low                        0.022923   3.390795e-05
Close_MSCI                 0.022754   3.870897e-05
Open                       0.022309   5.473519e-05
MA7                        0.022306   5.485074e-05
High                       0.022106   6.395461e-05
Middle Band                0.021395   1.092130e-04
MA21                       0.021350   1.128899e-04
MA50                       0.020800   1.688435e-04
MA100                      0.018208   9.919776e-04
Upper Band                 0.018121   1.049533e-03
MACD Histogram             0.012237   2.691003e-02
Volume                    -0.014191   1.028059e-02
Band Width                -0.021713   8.609264e-05
Aroon Down                -0.081898   8.808426e-50
Beta_120                  -0.124150  1.815258e-112
Beta_60                   -0.164698  1.474482e-197

import pandas as pd import yfinance as yf import ta from datetime import datetime

將相關技術指標分為微弱相關、正相關和負相關的表格,包括它們的相關係數(r)和 p 值:

類別指標r 值p 值
微弱相關RSI21-0.017.19e-239
RSI140.037.19e-168
RSI7-0.092.73e-56
Aroon Up0.039.03e-63
Market Return0.092.43e-22
Adj Close0.041.15e-13
Lower Band0.035.33e-06
Stock Return0.023.16e-06
Low0.023.39e-05
Close MSCI0.023.78e-05
Open0.025.47e-05
MA70.024.95e-05
High0.026.46e-05
Middle Band0.029.96e-04
MA210.021.34e-04
MA500.021.96e-04
MA100.029.92e-04
Upper Band0.025.05e-03
MACD Histogram0.013.10e-02
Band Width-0.028.16e-05
正相關RSI140.037.19e-168
CCI200.099.15e-54
MACD Line0.051.11e-45
Signal Line0.051.71e-42
WILLR140.091.67e-29
負相關CMO14-0.192.92e-69
RSI7-0.092.73e-56
%D-0.071.12e-29
%K-0.061.47e-29
Aroon Down-0.088.18e-50
Beta 120-0.121.82e-112

變數分群的討論

Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Load the data
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data.csv'
data = pd.read_csv(file_path)

# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')

# Define the target variable
target_variable = 'Close_TAIEX'

# Function to create scatter plot matrix by variable
def scatter_plot_matrix_by_variable():
    variables = [col for col in data.columns if col not in ['Date', 'ST_Code', 'ST_Name', target_variable]]

    for variable in variables:
        plt.figure(figsize=(25, 25))
        unique_stock_codes = data['ST_Code'].unique()

        for i, stock_code in enumerate(unique_stock_codes):
            stock_data = data[data['ST_Code'] == stock_code]
            taiex_data = data[['Date', target_variable]].drop_duplicates()

            aligned_data = pd.merge(stock_data, taiex_data, on='Date', suffixes=('', '_TAIEX'))
            aligned_data = aligned_data.replace([np.inf, -np.inf], np.nan).dropna()

            if aligned_data.shape[0] > 0 and variable in aligned_data.columns:
                correlation, p_value = pearsonr(aligned_data[target_variable], aligned_data[variable])
                plt.subplot((len(unique_stock_codes) + 4) // 5, 5, i + 1)
                sns.scatterplot(x=aligned_data[target_variable], y=aligned_data[variable])
                sns.regplot(x=aligned_data[target_variable], y=aligned_data[variable], scatter=False, color='red', ci=95, line_kws={'linestyle': 'dashed'})
                stock_name = aligned_data['ST_Name'].iloc[0]  # Get the stock name from the first row
                plt.title(f'{stock_code} ({stock_name})\n(r={correlation:.2f}, p={p_value:.2e})')
                plt.xlabel('TAIEX')
                plt.ylabel(variable)
        
        plt.suptitle(f'Scatter Plot Matrix for {variable}', fontsize=20)
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.savefig(f'/content/drive/My Drive/scatter_plot_matrix_{variable}.png')
        plt.show()

# Generate scatter plot matrix by variable
scatter_plot_matrix_by_variable()

import pandas as pd import yfinance as yf import ta from datetime import datetime