mirror of
https://github.com/Visualize-ML/Book4_Power-of-Matrix.git
synced 2026-02-03 02:24:03 +08:00
148 lines
2.5 KiB
Python
148 lines
2.5 KiB
Python
|
|
###############
|
|
# Authored by Weisheng Jiang
|
|
# Book 4 | From Basic Arithmetic to Machine Learning
|
|
# Published and copyrighted by Tsinghua University Press
|
|
# Beijing, China, 2022
|
|
###############
|
|
|
|
# Bk4_Ch24_01_A
|
|
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
from sklearn.datasets import load_iris
|
|
|
|
# A copy from Seaborn
|
|
iris = load_iris()
|
|
|
|
X = iris.data
|
|
y = iris.target
|
|
|
|
feature_names = ['Sepal length, x1','Sepal width, x2',
|
|
'Petal length, x3','Petal width, x4']
|
|
|
|
# Convert X array to dataframe
|
|
X_df = pd.DataFrame(X, columns=feature_names)
|
|
|
|
#%% Original data, X
|
|
|
|
X = X_df.to_numpy();
|
|
|
|
#%% Gram matrix, G
|
|
|
|
G = X.T@X
|
|
|
|
#%% Cosine similarity matrix, C
|
|
|
|
# from sklearn.metrics.pairwise import cosine_similarity
|
|
# C = cosine_similarity(X)
|
|
from numpy.linalg import inv
|
|
|
|
S_norm = np.diag(np.sqrt(np.diag(G)))
|
|
# scaling matrix, diagnal element is the norm of x_j
|
|
|
|
C = inv(S_norm)@G@inv(S_norm)
|
|
|
|
#%% centroid of data matrix, E(X)
|
|
|
|
E_X = X_df.mean().to_frame().T
|
|
|
|
#%% Demean, centralize, X_c
|
|
|
|
X_c = X_df.sub(X_df.mean())
|
|
|
|
#%% covariance matrix, Sigma
|
|
|
|
SIGMA = X_df.cov()
|
|
|
|
#%% correlation matrix, P
|
|
|
|
RHO = X_df.corr()
|
|
|
|
#%% Normalize data, Z_X
|
|
|
|
from scipy.stats import zscore
|
|
|
|
Z_X = zscore(X_df)
|
|
|
|
#%%
|
|
|
|
# Bk4_Ch24_01_B
|
|
|
|
#%% QR decomposition
|
|
|
|
from numpy.linalg import qr
|
|
|
|
Q, R = qr(X_df,mode = 'reduced')
|
|
|
|
#%%
|
|
|
|
# Bk4_Ch24_01_C
|
|
|
|
#%% Cholesky decomposition
|
|
|
|
from numpy.linalg import cholesky as chol
|
|
|
|
L_G = chol(G)
|
|
R_G = L_G.T
|
|
|
|
#%% Cholesky decompose covariance matrix, SIGMA
|
|
|
|
L_Sigma = chol(SIGMA)
|
|
|
|
R_Sigma = L_Sigma.T
|
|
|
|
#%%
|
|
|
|
# Bk4_Ch24_01_D
|
|
|
|
#%% eigen decompose G
|
|
|
|
from numpy.linalg import eig
|
|
|
|
Lambs_G,V_G = eig(G)
|
|
Lambs_G = np.diag(Lambs_G)
|
|
|
|
#%% eigen decompose Sigma, covariance matrix
|
|
|
|
Lambs_sigma,V_sigma = eig(SIGMA)
|
|
Lambs_sigma = np.diag(Lambs_sigma)
|
|
|
|
#%% eigen decompose P, correlation matrix
|
|
|
|
Lambs_P,V_P = eig(RHO)
|
|
Lambs_P = np.diag(Lambs_P)
|
|
|
|
#%%
|
|
|
|
# Bk4_Ch24_01_E
|
|
|
|
#%% SVD, original data X
|
|
|
|
from numpy.linalg import svd
|
|
|
|
U_X,S_X_,V_X = svd(X_df, full_matrices=False)
|
|
V_X = V_X.T
|
|
|
|
# full_matrices=True
|
|
# indices_diagonal = np.diag_indices(4)
|
|
# S_X = np.zeros_like(X_df)
|
|
# S_X[indices_diagonal] = S_X_
|
|
|
|
# full_matrices=False
|
|
S_X = np.diag(S_X_)
|
|
|
|
#%% SVD, original data Xc
|
|
|
|
U_Xc,S_Xc,V_Xc = svd(X_c, full_matrices=False)
|
|
V_Xc = V_Xc.T
|
|
S_Xc = np.diag(S_Xc)
|
|
|
|
#%% SVD, z scores
|
|
|
|
U_Z,S_Z,V_Z = svd(Z_X, full_matrices=False)
|
|
V_Z = V_Z.T
|
|
S_Z = np.diag(S_Z)
|
|
|