diff --git a/Book4_Ch22_Python_Codes/Bk4_Ch22_01.py b/Book4_Ch22_Python_Codes/Bk4_Ch22_01.py new file mode 100644 index 0000000..fa5bbc9 --- /dev/null +++ b/Book4_Ch22_Python_Codes/Bk4_Ch22_01.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jul 21 08:36:10 2022 + +@author: james +""" + +# Bk4_Ch22_01_A + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from sklearn.datasets import load_iris + +# Load the iris data +iris_sns = sns.load_dataset("iris") +# A copy from Seaborn +iris = load_iris() +# A copy from Sklearn + +X = iris.data +y = iris.target + +feature_names = ['Sepal length, x1','Sepal width, x2', + 'Petal length, x3','Petal width, x4'] + +# Convert X array to dataframe +X_df = pd.DataFrame(X, columns=feature_names) + +#%% Heatmap of X + +plt.close('all') +sns.set_style("ticks") + +X = X_df.to_numpy(); + +# Visualize the heatmap of X + +fig, ax = plt.subplots() +ax = sns.heatmap(X, + cmap='RdYlBu_r', + xticklabels=list(X_df.columns), + cbar_kws={"orientation": "vertical"}, + vmin=-1, vmax=9) +plt.title('X') + +#%% + +# Bk4_Ch22_01_B + +#%% centroid of data matrix, X +v_1 = np.ones((len(X),1)) + +E_X = v_1.T@X/len(X) +# validate: X.mean(axis = 0) + +#%% Demean, centralize + +X_demean = X_df.sub(X_df.mean()) + + +fig, ax = plt.subplots() +ax = sns.heatmap(X_demean, + cmap='RdYlBu_r', + xticklabels=list(X_df.columns), + cbar_kws={"orientation": "vertical"}, + vmin=-3, vmax=3) +plt.title('$X_{demean}$') + +#%% SSD + +SSD = (np.linalg.norm(X - E_X, axis = 1)**2).sum() +# validate: ((X - E_X)**2).sum() +# use trace: np.trace((X - E_X).T@(X - E_X)) + +#%% + +# Bk4_Ch22_01_C + +# distribution of column features of X + +fig, ax = plt.subplots() +sns.kdeplot(data=X_demean,fill=True, + common_norm=False, + alpha=.3, linewidth=1, + palette = "viridis") +plt.title('Distribution of $X_{demean}$ columns') + +#%% + +# Bk4_Ch22_01_D + +#%% covariance matrix + +SIGMA = X_df.cov() + +fig, axs = plt.subplots() + +h = sns.heatmap(SIGMA,cmap='RdBu_r', linewidths=.05, annot = True) +h.set_aspect("equal") +h.set_title('$\Sigma$') + + +#%% correlation matrix + +RHO = X_df.corr() + +fig, axs = plt.subplots() + +h = sns.heatmap(RHO,cmap='RdBu_r', linewidths=.05, annot = True) +h.set_aspect("equal") +h.set_title('$\u03A1$') + +#%% compare covariance matrices + +f,(ax1,ax2,ax3) = plt.subplots(1,3,sharey=True) + +g1 = sns.heatmap(X_df[y==0].cov(),cmap="RdYlBu_r", + annot=True,cbar=False,ax=ax1,square=True, + vmax = 0.4, vmin = 0) +ax1.set_title('Y = 0, setosa') + +g2 = sns.heatmap(X_df[y==1].cov(),cmap="RdYlBu_r", + annot=True,cbar=False,ax=ax2,square=True, + vmax = 0.4, vmin = 0) +ax2.set_title('Y = 1, versicolor') + +g3 = sns.heatmap(X_df[y==2].cov(),cmap="RdYlBu_r", + annot=True,cbar=False,ax=ax3,square=True, + vmax = 0.4, vmin = 0) +ax3.set_title('Y = 2, virginica') + +#%% compare correlation matrices + +f,(ax1,ax2,ax3) = plt.subplots(1,3,sharey=True) + +g1 = sns.heatmap(X_df[y==0].corr(),cmap="RdYlBu_r", + annot=True,cbar=False,ax=ax1,square=True, + vmax = 1, vmin = 0.15) +ax1.set_title('Y = 0, setosa') + +g2 = sns.heatmap(X_df[y==1].corr(),cmap="RdYlBu_r", + annot=True,cbar=False,ax=ax2,square=True, + vmax = 1, vmin = 0.15) +ax2.set_title('Y = 1, versicolor') + +g3 = sns.heatmap(X_df[y==2].corr(),cmap="RdYlBu_r", + annot=True,cbar=False,ax=ax3,square=True, + vmax = 1, vmin = 0.15) +ax3.set_title('Y = 2, virginica') diff --git a/Book4_Ch22_数据与统计__数学要素__从加减乘除到机器学习.pdf b/Book4_Ch22_数据与统计__数学要素__从加减乘除到机器学习.pdf new file mode 100644 index 0000000..78aa115 Binary files /dev/null and b/Book4_Ch22_数据与统计__数学要素__从加减乘除到机器学习.pdf differ