Add files via upload

2026-05-11 00:49:34 +08:00 · 2022-07-20 22:42:20 -04:00
parent 3758b51116
commit a4d00b28ea
2 changed files with 147 additions and 0 deletions
--- a/Book4_Ch24_Python_Codes/Bk4_Ch24_01.py
+++ b/Book4_Ch24_Python_Codes/Bk4_Ch24_01.py
@@ -0,0 +1,147 @@
+
+###############
+# Authored by Weisheng Jiang
+# Book 4  |  From Basic Arithmetic to Machine Learning
+# Published and copyrighted by Tsinghua University Press
+# Beijing, China, 2022
+###############
+
+# Bk4_Ch24_01_A
+
+import numpy as np
+import matplotlib.pyplot as plt 
+import pandas as pd  
+from sklearn.datasets import load_iris
+
+# A copy from Seaborn
+iris = load_iris()
+
+X = iris.data
+y = iris.target
+
+feature_names = ['Sepal length, x1','Sepal width, x2',
+                 'Petal length, x3','Petal width, x4']
+
+# Convert X array to dataframe
+X_df = pd.DataFrame(X, columns=feature_names)
+
+#%% Original data, X
+
+X = X_df.to_numpy();
+
+#%% Gram matrix, G
+
+G = X.T@X
+
+#%% Cosine similarity matrix, C
+
+# from sklearn.metrics.pairwise import cosine_similarity
+# C = cosine_similarity(X)
+from numpy.linalg import inv
+
+S_norm = np.diag(np.sqrt(np.diag(G)))
+# scaling matrix, diagnal element is the norm of x_j
+
+C = inv(S_norm)@G@inv(S_norm)
+
+#%% centroid of data matrix, E(X)
+
+E_X = X_df.mean().to_frame().T
+
+#%% Demean, centralize, X_c
+
+X_c = X_df.sub(X_df.mean())
+
+#%% covariance matrix, Sigma
+
+SIGMA = X_df.cov()
+
+#%% correlation matrix, P
+
+RHO = X_df.corr()
+
+#%% Normalize data, Z_X
+
+from scipy.stats import zscore
+
+Z_X = zscore(X_df)
+
+#%%
+
+# Bk4_Ch24_01_B
+
+#%% QR decomposition
+
+from numpy.linalg import qr
+
+Q, R = qr(X_df,mode = 'reduced')
+
+#%%
+
+# Bk4_Ch24_01_C
+
+#%% Cholesky decomposition
+
+from numpy.linalg import cholesky as chol
+
+L_G = chol(G)
+R_G = L_G.T
+
+#%% Cholesky decompose covariance matrix, SIGMA
+
+L_Sigma = chol(SIGMA)
+
+R_Sigma = L_Sigma.T
+
+#%%
+
+# Bk4_Ch24_01_D
+
+#%% eigen decompose G
+
+from numpy.linalg import eig
+
+Lambs_G,V_G = eig(G)
+Lambs_G = np.diag(Lambs_G)
+
+#%% eigen decompose Sigma, covariance matrix
+
+Lambs_sigma,V_sigma = eig(SIGMA)
+Lambs_sigma = np.diag(Lambs_sigma)
+
+#%% eigen decompose P, correlation matrix
+
+Lambs_P,V_P = eig(RHO)
+Lambs_P = np.diag(Lambs_P)
+
+#%%
+
+# Bk4_Ch24_01_E
+
+#%% SVD, original data X
+
+from numpy.linalg import svd
+
+U_X,S_X_,V_X = svd(X_df, full_matrices=False)
+V_X = V_X.T
+
+# full_matrices=True
+# indices_diagonal = np.diag_indices(4)
+# S_X = np.zeros_like(X_df)
+# S_X[indices_diagonal] = S_X_
+
+# full_matrices=False
+S_X = np.diag(S_X_)
+
+#%% SVD, original data Xc
+
+U_Xc,S_Xc,V_Xc = svd(X_c, full_matrices=False)
+V_Xc = V_Xc.T
+S_Xc = np.diag(S_Xc)
+
+#%% SVD, z scores
+
+U_Z,S_Z,V_Z = svd(Z_X, full_matrices=False)
+V_Z = V_Z.T
+S_Z = np.diag(S_Z)
+