Files
Book4_Power-of-Matrix/Book4_Ch24_Python_Codes/Bk4_Ch24_01.py
2022-07-20 22:42:20 -04:00

148 lines
2.5 KiB
Python

###############
# Authored by Weisheng Jiang
# Book 4 | From Basic Arithmetic to Machine Learning
# Published and copyrighted by Tsinghua University Press
# Beijing, China, 2022
###############
# Bk4_Ch24_01_A
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_iris
# A copy from Seaborn
iris = load_iris()
X = iris.data
y = iris.target
feature_names = ['Sepal length, x1','Sepal width, x2',
'Petal length, x3','Petal width, x4']
# Convert X array to dataframe
X_df = pd.DataFrame(X, columns=feature_names)
#%% Original data, X
X = X_df.to_numpy();
#%% Gram matrix, G
G = X.T@X
#%% Cosine similarity matrix, C
# from sklearn.metrics.pairwise import cosine_similarity
# C = cosine_similarity(X)
from numpy.linalg import inv
S_norm = np.diag(np.sqrt(np.diag(G)))
# scaling matrix, diagnal element is the norm of x_j
C = inv(S_norm)@G@inv(S_norm)
#%% centroid of data matrix, E(X)
E_X = X_df.mean().to_frame().T
#%% Demean, centralize, X_c
X_c = X_df.sub(X_df.mean())
#%% covariance matrix, Sigma
SIGMA = X_df.cov()
#%% correlation matrix, P
RHO = X_df.corr()
#%% Normalize data, Z_X
from scipy.stats import zscore
Z_X = zscore(X_df)
#%%
# Bk4_Ch24_01_B
#%% QR decomposition
from numpy.linalg import qr
Q, R = qr(X_df,mode = 'reduced')
#%%
# Bk4_Ch24_01_C
#%% Cholesky decomposition
from numpy.linalg import cholesky as chol
L_G = chol(G)
R_G = L_G.T
#%% Cholesky decompose covariance matrix, SIGMA
L_Sigma = chol(SIGMA)
R_Sigma = L_Sigma.T
#%%
# Bk4_Ch24_01_D
#%% eigen decompose G
from numpy.linalg import eig
Lambs_G,V_G = eig(G)
Lambs_G = np.diag(Lambs_G)
#%% eigen decompose Sigma, covariance matrix
Lambs_sigma,V_sigma = eig(SIGMA)
Lambs_sigma = np.diag(Lambs_sigma)
#%% eigen decompose P, correlation matrix
Lambs_P,V_P = eig(RHO)
Lambs_P = np.diag(Lambs_P)
#%%
# Bk4_Ch24_01_E
#%% SVD, original data X
from numpy.linalg import svd
U_X,S_X_,V_X = svd(X_df, full_matrices=False)
V_X = V_X.T
# full_matrices=True
# indices_diagonal = np.diag_indices(4)
# S_X = np.zeros_like(X_df)
# S_X[indices_diagonal] = S_X_
# full_matrices=False
S_X = np.diag(S_X_)
#%% SVD, original data Xc
U_Xc,S_Xc,V_Xc = svd(X_c, full_matrices=False)
V_Xc = V_Xc.T
S_Xc = np.diag(S_Xc)
#%% SVD, z scores
U_Z,S_Z,V_Z = svd(Z_X, full_matrices=False)
V_Z = V_Z.T
S_Z = np.diag(S_Z)