From c9e688146dfc752adeed29e8fd88aa8758cf111d Mon Sep 17 00:00:00 2001 From: "Iris Series: Visualize Math -- From Arithmetic Basics to Machine Learning" <105787223+Visualize-ML@users.noreply.github.com> Date: Sat, 1 Feb 2025 17:04:49 +0800 Subject: [PATCH] Delete Book4_Ch22_Python_Codes directory --- Book4_Ch22_Python_Codes/Bk4_Ch22_01.py | 153 ------------------------- 1 file changed, 153 deletions(-) delete mode 100644 Book4_Ch22_Python_Codes/Bk4_Ch22_01.py diff --git a/Book4_Ch22_Python_Codes/Bk4_Ch22_01.py b/Book4_Ch22_Python_Codes/Bk4_Ch22_01.py deleted file mode 100644 index 1fe31d3..0000000 --- a/Book4_Ch22_Python_Codes/Bk4_Ch22_01.py +++ /dev/null @@ -1,153 +0,0 @@ - -############### -# Authored by Weisheng Jiang -# Book 4 | From Basic Arithmetic to Machine Learning -# Published and copyrighted by Tsinghua University Press -# Beijing, China, 2022 -############### - - -# Bk4_Ch22_01_A - -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -import seaborn as sns -from sklearn.datasets import load_iris - -# Load the iris data -iris_sns = sns.load_dataset("iris") -# A copy from Seaborn -iris = load_iris() -# A copy from Sklearn - -X = iris.data -y = iris.target - -feature_names = ['Sepal length, x1','Sepal width, x2', - 'Petal length, x3','Petal width, x4'] - -# Convert X array to dataframe -X_df = pd.DataFrame(X, columns=feature_names) - -#%% Heatmap of X - -plt.close('all') -sns.set_style("ticks") - -X = X_df.to_numpy(); - -# Visualize the heatmap of X - -fig, ax = plt.subplots() -ax = sns.heatmap(X, - cmap='RdYlBu_r', - xticklabels=list(X_df.columns), - cbar_kws={"orientation": "vertical"}, - vmin=-1, vmax=9) -plt.title('X') - -#%% - -# Bk4_Ch22_01_B - -#%% centroid of data matrix, X -v_1 = np.ones((len(X),1)) - -E_X = v_1.T@X/len(X) -# validate: X.mean(axis = 0) - -#%% Demean, centralize - -X_demean = X_df.sub(X_df.mean()) - - -fig, ax = plt.subplots() -ax = sns.heatmap(X_demean, - cmap='RdYlBu_r', - xticklabels=list(X_df.columns), - cbar_kws={"orientation": "vertical"}, - vmin=-3, vmax=3) -plt.title('$X_{demean}$') - -#%% SSD - -SSD = (np.linalg.norm(X - E_X, axis = 1)**2).sum() -# validate: ((X - E_X)**2).sum() -# use trace: np.trace((X - E_X).T@(X - E_X)) - -#%% - -# Bk4_Ch22_01_C - -# distribution of column features of X - -fig, ax = plt.subplots() -sns.kdeplot(data=X_demean,fill=True, - common_norm=False, - alpha=.3, linewidth=1, - palette = "viridis") -plt.title('Distribution of $X_{demean}$ columns') - -#%% - -# Bk4_Ch22_01_D - -#%% covariance matrix - -SIGMA = X_df.cov() - -fig, axs = plt.subplots() - -h = sns.heatmap(SIGMA,cmap='RdBu_r', linewidths=.05, annot = True) -h.set_aspect("equal") -h.set_title('$\Sigma$') - - -#%% correlation matrix - -RHO = X_df.corr() - -fig, axs = plt.subplots() - -h = sns.heatmap(RHO,cmap='RdBu_r', linewidths=.05, annot = True) -h.set_aspect("equal") -h.set_title('$\u03A1$') - -#%% compare covariance matrices - -f,(ax1,ax2,ax3) = plt.subplots(1,3,sharey=True) - -g1 = sns.heatmap(X_df[y==0].cov(),cmap="RdYlBu_r", - annot=True,cbar=False,ax=ax1,square=True, - vmax = 0.4, vmin = 0) -ax1.set_title('Y = 0, setosa') - -g2 = sns.heatmap(X_df[y==1].cov(),cmap="RdYlBu_r", - annot=True,cbar=False,ax=ax2,square=True, - vmax = 0.4, vmin = 0) -ax2.set_title('Y = 1, versicolor') - -g3 = sns.heatmap(X_df[y==2].cov(),cmap="RdYlBu_r", - annot=True,cbar=False,ax=ax3,square=True, - vmax = 0.4, vmin = 0) -ax3.set_title('Y = 2, virginica') - -#%% compare correlation matrices - -f,(ax1,ax2,ax3) = plt.subplots(1,3,sharey=True) - -g1 = sns.heatmap(X_df[y==0].corr(),cmap="RdYlBu_r", - annot=True,cbar=False,ax=ax1,square=True, - vmax = 1, vmin = 0.15) -ax1.set_title('Y = 0, setosa') - -g2 = sns.heatmap(X_df[y==1].corr(),cmap="RdYlBu_r", - annot=True,cbar=False,ax=ax2,square=True, - vmax = 1, vmin = 0.15) -ax2.set_title('Y = 1, versicolor') - -g3 = sns.heatmap(X_df[y==2].corr(),cmap="RdYlBu_r", - annot=True,cbar=False,ax=ax3,square=True, - vmax = 1, vmin = 0.15) -ax3.set_title('Y = 2, virginica')