Predict
K11.各國非傳染病概覽_Non_HDL

cy.lu

05 Jul 2024 • 2 min read
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive

# Suppress warnings
import warnings
warnings.simplefilter('ignore', np.RankWarning)

# Mount Google Drive
drive.mount('/content/drive')

# Load the data
file_path = '/content/drive/My Drive/dataset/az_0704.csv'
data = pd.read_csv(file_path)

# Define GDP group names
gdp_group_names = {0: 'Low', 1: 'Lower Middle', 2: 'Middle', 3: 'Upper Middle', 4: 'High'}

# Get unique GDP groups and country names sorted alphabetically
gdp_groups = sorted(data['gdpg1'].unique())
country_names = sorted(data['Country Name'].unique())

# Initialize a list to store individual plots
individual_plots = []

# Plotting scatter plot for each country with nonhdl_mgdl over years grouped by GDP groups
countries_per_row = 5
y_axis_column = 'nonhdl_mgdl'

for gdp_group in gdp_groups:
    subset = data[data['gdpg1'] == gdp_group]

    country_subset_names = sorted(subset['Country Name'].unique())

    for i in range(0, len(country_subset_names), countries_per_row):
        n_cols = min(countries_per_row, len(country_subset_names) - i)
        fig, axes = plt.subplots(1, countries_per_row, figsize=(30, 5))
        fig.suptitle(f'Scatter Plot for GDP Group: {gdp_group_names[gdp_group]}', y=1.02)

        for j in range(countries_per_row):
            if j < n_cols:
                country = country_subset_names[i + j]
                country_subset = subset[subset['Country Name'] == country]

                try:
                    # Scatter plot with regression lines
                    sns.regplot(ax=axes[j], x='gdppp2017', y=y_axis_column, data=country_subset, scatter_kws={'color': 'blue'}, line_kws={'color': 'red', 'linewidth': 3})
                    sns.regplot(ax=axes[j], x='gdppp2017', y=y_axis_column, data=country_subset, scatter=False, order=2, line_kws={'color': 'green', 'linewidth': 3})
                except np.RankWarning:
                    sns.regplot(ax=axes[j], x='gdppp2017', y=y_axis_column, data=country_subset, scatter=False, order=1, line_kws={'color': 'orange', 'linewidth': 3})

                axes[j].set_title(f'{country}')
                axes[j].set_xlabel('GDP')
                axes[j].set_ylabel('nonhdl_mgdl')

            else:
                axes[j].axis('off')

        # Adjust layout to make sure all subplots have the same size
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.show()

        # Save individual plots to list
        individual_plots.append(fig)

# Combine all individual plots into one large figure
total_plots = len(individual_plots)
plots_per_row = 1
total_rows = total_plots

fig, axes = plt.subplots(total_rows, plots_per_row, figsize=(15, total_rows * 5))

for i, individual_plot in enumerate(individual_plots):
    row = i // plots_per_row
    col = i % plots_per_row
    axes[row].imshow(individual_plot.canvas.buffer_rgba())
    axes[row].axis('off')

plt.tight_layout()
plt.show()