mirror of
https://github.com/Visualize-ML/Book4_Power-of-Matrix.git
synced 2026-02-02 18:21:08 +08:00
Add files via upload
This commit is contained in:
committed by
GitHub
parent
79be5dda7d
commit
5adb9e44a7
190
Book4_Ch11_Python_Codes/Bk4_Ch11_01.ipynb
Normal file
190
Book4_Ch11_Python_Codes/Bk4_Ch11_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
374
Book4_Ch11_Python_Codes/Bk4_Ch11_02.ipynb
Normal file
374
Book4_Ch11_Python_Codes/Bk4_Ch11_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
367
Book4_Ch12_Python_Codes/Bk4_Ch12_01.ipynb
Normal file
367
Book4_Ch12_Python_Codes/Bk4_Ch12_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
331
Book4_Ch13_Python_Codes/Bk4_Ch13_01.ipynb
Normal file
331
Book4_Ch13_Python_Codes/Bk4_Ch13_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
344
Book4_Ch13_Python_Codes/Bk4_Ch13_02.ipynb
Normal file
344
Book4_Ch13_Python_Codes/Bk4_Ch13_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
308
Book4_Ch13_Python_Codes/Bk4_Ch13_03.ipynb
Normal file
308
Book4_Ch13_Python_Codes/Bk4_Ch13_03.ipynb
Normal file
File diff suppressed because one or more lines are too long
90
Book4_Ch13_Python_Codes/Streamlit_Bk4_Ch13_04.py
Normal file
90
Book4_Ch13_Python_Codes/Streamlit_Bk4_Ch13_04.py
Normal file
@@ -0,0 +1,90 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2022
|
||||
###############
|
||||
|
||||
import streamlit as st # 导入 Streamlit 库,用于创建交互式 Web 应用
|
||||
import numpy as np # 导入 NumPy 库,用于数值计算
|
||||
import plotly.express as px # 导入 Plotly Express 库,用于绘制交互式图表
|
||||
import pandas as pd # 导入 Pandas 库,用于数据处理
|
||||
|
||||
# 定义函数 bmatrix,用于将 NumPy 数组转化为 LaTeX 矩阵格式
|
||||
def bmatrix(a):
|
||||
"""返回一个 LaTeX 矩阵表示"""
|
||||
if len(a.shape) > 2: # 检查输入的数组是否为二维
|
||||
raise ValueError('bmatrix 函数最多显示二维矩阵') # 如果不是二维数组,抛出异常
|
||||
lines = str(a).replace('[', '').replace(']', '').splitlines() # 去掉数组的方括号并按行拆分
|
||||
rv = [r'\begin{bmatrix}'] # 开始 LaTeX 矩阵的表示
|
||||
rv += [' ' + ' & '.join(l.split()) + r'\\' for l in lines] # 将每一行的元素用 LaTeX 格式化
|
||||
rv += [r'\end{bmatrix}'] # 结束 LaTeX 矩阵的表示
|
||||
return '\n'.join(rv) # 返回拼接后的 LaTeX 字符串
|
||||
|
||||
# 在侧边栏创建交互式滑块,用户可调整矩阵 A 的元素
|
||||
with st.sidebar:
|
||||
# 在侧边栏中展示一个 LaTeX 格式的矩阵模板
|
||||
st.latex(r'''
|
||||
A = \begin{bmatrix}
|
||||
a & b\\
|
||||
c & d
|
||||
\end{bmatrix}''')
|
||||
|
||||
# 为矩阵 A 的元素 a, b, c, d 创建滑块,用户可调整这些值
|
||||
a = st.slider('a', -2.0, 2.0, step=0.1, value=1.0) # 滑块用于设置 a 的值,默认值为 1.0
|
||||
b = st.slider('b', -2.0, 2.0, step=0.1, value=0.0) # 滑块用于设置 b 的值,默认值为 0.0
|
||||
c = st.slider('c', -2.0, 2.0, step=0.1, value=0.0) # 滑块用于设置 c 的值,默认值为 0.0
|
||||
d = st.slider('d', -2.0, 2.0, step=0.1, value=1.0) # 滑块用于设置 d 的值,默认值为 1.0
|
||||
|
||||
#%% 创建网格点用于二维平面上的点
|
||||
x1_ = np.linspace(-1, 1, 11) # 在 [-1, 1] 区间内生成 11 个均匀分布的点,用于 x1
|
||||
x2_ = np.linspace(-1, 1, 11) # 在 [-1, 1] 区间内生成 11 个均匀分布的点,用于 x2
|
||||
|
||||
xx1, xx2 = np.meshgrid(x1_, x2_) # 创建二维网格,用于生成所有点的坐标
|
||||
X = np.column_stack((xx1.flatten(), xx2.flatten())) # 将网格点展开为二维数组,每行一个点的坐标
|
||||
|
||||
# 定义矩阵 A,由用户调整的滑块值确定
|
||||
A = np.array([[a, b], # 矩阵 A 的第一行
|
||||
[c, d]]) # 矩阵 A 的第二行
|
||||
|
||||
X = X @ A # 使用矩阵乘法,将点集 X 通过矩阵 A 进行线性变换
|
||||
|
||||
#%% 创建颜色数组并将其添加到点数据中
|
||||
color_array = np.linspace(0, 1, len(X)) # 为每个点生成一个对应的颜色值,范围为 [0, 1]
|
||||
X = np.column_stack((X, color_array)) # 将颜色值添加到点数据中,作为第三列
|
||||
df = pd.DataFrame(X, columns=['z1', 'z2', 'color']) # 将点数据转换为 DataFrame,并命名列为 z1, z2, 和 color
|
||||
|
||||
#%% 绘制散点图
|
||||
st.latex('A = ' + bmatrix(A)) # 在页面上以 LaTeX 格式展示矩阵 A
|
||||
|
||||
# 使用 Plotly Express 绘制散点图
|
||||
fig = px.scatter(df, # 数据来源为 DataFrame
|
||||
x="z1", # z1 作为横轴
|
||||
y="z2", # z2 作为纵轴
|
||||
color='color', # 根据 color 列设置点的颜色
|
||||
color_continuous_scale='rainbow') # 使用彩虹色带表示颜色
|
||||
|
||||
# 设置图形的布局参数
|
||||
fig.update_layout(
|
||||
autosize=False, # 禁用自动尺寸调整
|
||||
width=500, # 设置图形宽度为 500 像素
|
||||
height=500) # 设置图形高度为 500 像素
|
||||
|
||||
# 添加横轴和纵轴的黑色参考线
|
||||
fig.add_hline(y=0, line_color='black') # 添加横轴参考线
|
||||
fig.add_vline(x=0, line_color='black') # 添加纵轴参考线
|
||||
|
||||
# 设置坐标轴的显示范围
|
||||
fig.update_xaxes(range=[-3, 3]) # 设置 x 轴范围为 [-3, 3]
|
||||
fig.update_yaxes(range=[-3, 3]) # 设置 y 轴范围为 [-3, 3]
|
||||
|
||||
# 禁用颜色条显示
|
||||
fig.update_coloraxes(showscale=False) # 隐藏颜色条
|
||||
|
||||
# 在 Streamlit 页面中展示绘制的散点图
|
||||
st.plotly_chart(fig)
|
||||
|
||||
|
||||
|
||||
|
||||
242
Book4_Ch14_Python_Codes/Bk4_Ch14_01.ipynb
Normal file
242
Book4_Ch14_Python_Codes/Bk4_Ch14_01.ipynb
Normal file
@@ -0,0 +1,242 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73bd968b-d970-4a05-94ef-4e7abf990827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Chapter 14\n",
|
||||
"\n",
|
||||
"# 矩阵平方根\n",
|
||||
"Book_4《矩阵力量》 | 鸢尾花书:从加减乘除到机器学习 (第二版)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c263fc95-881a-49fb-9c6c-a25508996623",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"该代码的主要任务是通过矩阵分解和重构方法,基于给定的矩阵$A$,验证矩阵重构的正确性。具体流程如下:\n",
|
||||
"\n",
|
||||
"1. **定义矩阵$A$**: \n",
|
||||
" 代码首先创建了一个$2 \\times 2$矩阵 $A = \\begin{bmatrix} 1.25 & -0.75 \\\\ -0.75 & 1.25 \\end{bmatrix}$。矩阵 $A$ 是一个对称矩阵,因此可以进行特征值分解。\n",
|
||||
"\n",
|
||||
"2. **计算特征值和特征向量**: \n",
|
||||
" 代码使用 `np.linalg.eig` 函数对矩阵 $A$ 进行特征值分解,计算出$A$的特征值(存储在 $LAMBDA$ 中)和特征向量(存储在 $V$ 中),满足以下分解公式:\n",
|
||||
" $$\n",
|
||||
" A = V \\Lambda V^{-1}\n",
|
||||
" $$\n",
|
||||
" 其中,$\\Lambda$ 是一个包含特征值的对角矩阵,$V$ 是由特征向量组成的矩阵。\n",
|
||||
"\n",
|
||||
"3. **构建矩阵$B$**: \n",
|
||||
" 接下来,代码构建了一个新的矩阵 $B$。它的计算公式为:\n",
|
||||
" $$\n",
|
||||
" B = V \\sqrt{\\Lambda} V^{-1}\n",
|
||||
" $$\n",
|
||||
" 其中,$\\sqrt{\\Lambda}$ 是对角矩阵,其对角元素是 $\\Lambda$ 的平方根。也就是说,$B$ 是通过将 $A$ 的特征值取平方根后重新组合得到的矩阵。\n",
|
||||
"\n",
|
||||
"4. **重构矩阵$A$并验证结果**: \n",
|
||||
" 最后,通过矩阵 $B$ 构造了一个新矩阵 $A_{reproduced}$,其计算公式为:\n",
|
||||
" $$\n",
|
||||
" A_{reproduced} = B B^T\n",
|
||||
" $$\n",
|
||||
" 这是利用矩阵 $B$ 重构 $A$ 的过程。对称矩阵 $A$ 的特征值平方根分解使得 $B B^T$ 应等于原始矩阵 $A$。因此,通过打印 $A_{reproduced}$,可以验证 $B B^T$ 是否等于 $A$,从而确认重构的正确性。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2759881c-9e2a-4e4d-a79c-1617f2a4be4f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np # 导入NumPy库"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a685835a-bcda-40f8-ac57-ff3738c0209f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 初始化矩阵A"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a6e9a15c-1d77-4a95-a13c-1c825598e8be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"A = np.matrix([[1.25, -0.75], # 定义矩阵A\n",
|
||||
" [-0.75, 1.25]]) # 矩阵的元素"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55297e89-757e-4136-a0af-4763d1db3e56",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 计算特征值和特征向量"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "998fa920-5e90-408f-8b70-dd3633c870e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"LAMBDA, V = np.linalg.eig(A) # 计算矩阵A的特征值(LAMBDA)和特征向量(V)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ce8e4f8c-19a0-4392-b1ef-e794dcc5f187",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([2. , 0.5])"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"LAMBDA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "13196da8-54c0-44bb-ac29-6cf7c6fec408",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"matrix([[ 0.70710678, 0.70710678],\n",
|
||||
" [-0.70710678, 0.70710678]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"V"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6bb1a72-57de-4ae9-a2e2-599db3122538",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 构建矩阵B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "cb92d4fe-8443-473f-a61c-378630468024",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"B = V @ np.diag(np.sqrt(LAMBDA)) @ np.linalg.inv(V) # 根据特征值和特征向量构建矩阵B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "da0f2576-d262-41cd-8324-208cf3df1c82",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"matrix([[ 1.06066017, -0.35355339],\n",
|
||||
" [-0.35355339, 1.06066017]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d70a4893-0524-47aa-91ad-4ad9863a5c89",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 重构矩阵A并打印"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "d07c6472-787a-44c2-9d20-99e4d070826a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[ 1.25 -0.75]\n",
|
||||
" [-0.75 1.25]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"A_reproduced = B @ B.T # 通过矩阵B的转置乘积重构矩阵A\n",
|
||||
"print(A_reproduced) # 输出重构后的矩阵A"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85a80909-2aac-49ed-bb7a-f8cc6b80ee7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ecd322f4-f919-4be2-adc3-69d28ef25e69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
263
Book4_Ch14_Python_Codes/Bk4_Ch14_02.ipynb
Normal file
263
Book4_Ch14_Python_Codes/Bk4_Ch14_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
54
Book4_Ch14_Python_Codes/Streamlit_Bk4_Ch14_02.py
Normal file
54
Book4_Ch14_Python_Codes/Streamlit_Bk4_Ch14_02.py
Normal file
@@ -0,0 +1,54 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
import numpy as np # 导入NumPy库,用于数值计算
|
||||
import streamlit as st # 导入Streamlit库,用于创建交互式Web应用
|
||||
import time # 导入time模块,用于添加延时效果
|
||||
|
||||
# 定义状态转移矩阵 A
|
||||
A = np.matrix([[0.7, 0.2], # 第一行表示从鸡到鸡的概率为0.7,从鸡到兔子的概率为0.3
|
||||
[0.3, 0.8]]) # 第二行表示从兔子到鸡的概率为0.2,从兔子到兔子的概率为0.8
|
||||
|
||||
# 在侧边栏中创建交互组件
|
||||
with st.sidebar:
|
||||
# 创建滑块,用于用户设置初始状态中鸡的比例
|
||||
pi_0_chicken = st.slider('Ratio of chicken:', # 滑块标题
|
||||
0.0, 1.0, step=0.1) # 范围从0到1,每次步进0.1
|
||||
pi_0_rabbit = 1 - pi_0_chicken # 计算兔子的比例,使鸡和兔子的比例和为1
|
||||
st.write('Ratio of rabbit: ' + str(round(pi_0_rabbit, 1))) # 显示兔子的比例,保留1位小数
|
||||
|
||||
# 创建滑块,用于用户设置模拟的天数
|
||||
num_iterations = st.slider('Number of nights:', # 滑块标题
|
||||
20, 100, step=5) # 范围从20到100,每次步进5
|
||||
|
||||
# 创建进度条和状态文本,用于显示迭代进度
|
||||
progress_bar = st.sidebar.progress(0) # 初始化进度条为0%
|
||||
status_text = st.sidebar.empty() # 创建一个空白的文本区域,用于显示进度百分比
|
||||
|
||||
# 初始化状态向量,将用户输入的初始比例存入数组
|
||||
last_rows = np.array([[pi_0_chicken, pi_0_rabbit]]) # 初始状态为一个行向量,包含鸡和兔子的比例
|
||||
|
||||
# 创建一个折线图,用于实时展示状态变化
|
||||
chart = st.line_chart(last_rows) # 初始化折线图,并将初始状态绘制到图上
|
||||
|
||||
# 开始迭代模拟状态转移
|
||||
for i in range(1, num_iterations): # 循环从第1天到用户设置的总天数
|
||||
last_status = last_rows[-1, :] # 获取当前的状态向量(最后一行)
|
||||
new_rows = last_status @ A.T # 使用矩阵乘法计算下一个状态,转移矩阵取转置
|
||||
percent = (i + 1) * 100 / num_iterations # 计算当前完成的百分比
|
||||
|
||||
# 更新进度条和状态文本
|
||||
status_text.text("%i%% Complete" % percent) # 显示当前完成的百分比
|
||||
chart.add_rows(new_rows) # 将新状态添加到折线图
|
||||
progress_bar.progress(i) # 更新进度条的值
|
||||
last_rows = new_rows # 更新最后的状态向量为当前计算的状态
|
||||
time.sleep(0.1) # 延时0.1秒,以便观察每次状态更新
|
||||
|
||||
# 清空进度条,表示模拟完成
|
||||
progress_bar.empty() # 移除进度条
|
||||
|
||||
119
Book4_Ch14_Python_Codes/Streamlit_Bk4_Ch14_03.py
Normal file
119
Book4_Ch14_Python_Codes/Streamlit_Bk4_Ch14_03.py
Normal file
@@ -0,0 +1,119 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
|
||||
import plotly.graph_objects as go # 导入 Plotly 的图形对象模块,用于创建复杂的图形
|
||||
import streamlit as st # 导入 Streamlit 库,用于创建交互式 Web 应用
|
||||
import numpy as np # 导入 NumPy,用于数值计算
|
||||
import plotly.express as px # 导入 Plotly Express,用于快速绘制图表
|
||||
import pandas as pd # 导入 Pandas,用于数据处理
|
||||
import sympy # 导入 SymPy,用于符号运算和公式化表达
|
||||
|
||||
# 定义函数 bmatrix,将 NumPy 数组转换为 LaTeX 格式的矩阵表示
|
||||
def bmatrix(a):
|
||||
"""返回一个 LaTeX 矩阵表示"""
|
||||
if len(a.shape) > 2: # 检查输入是否为二维数组
|
||||
raise ValueError('bmatrix 函数最多显示二维矩阵') # 如果不是二维,抛出异常
|
||||
lines = str(a).replace('[', '').replace(']', '').splitlines() # 将数组转换为字符串并去掉方括号
|
||||
rv = [r'\begin{bmatrix}'] # 开始 LaTeX 矩阵的格式
|
||||
rv += [' ' + ' & '.join(l.split()) + r'\\' for l in lines] # 逐行添加 LaTeX 矩阵行
|
||||
rv += [r'\end{bmatrix}'] # 结束 LaTeX 矩阵的格式
|
||||
return '\n'.join(rv) # 返回拼接后的 LaTeX 字符串
|
||||
|
||||
# 创建 Streamlit 侧边栏,用于调整矩阵 A 的参数
|
||||
with st.sidebar:
|
||||
# 显示矩阵 A 的 LaTeX 表示
|
||||
st.latex(r'''
|
||||
A = \begin{bmatrix}
|
||||
a & b\\
|
||||
b & c
|
||||
\end{bmatrix}''')
|
||||
|
||||
# 创建滑块,允许用户调整矩阵 A 的元素 a, b, c 的值
|
||||
a = st.slider('a', -2.0, 2.0, step=0.05, value=1.0) # 滑块用于调整 a 的值
|
||||
b = st.slider('b', -2.0, 2.0, step=0.05, value=0.0) # 滑块用于调整 b 的值
|
||||
c = st.slider('c', -2.0, 2.0, step=0.05, value=1.0) # 滑块用于调整 c 的值
|
||||
|
||||
#%% 创建一个单位圆的点集
|
||||
theta_array = np.linspace(0, 2 * np.pi, 36) # 在 [0, 2π] 区间生成 36 个点,表示角度
|
||||
X = np.column_stack((np.cos(theta_array), # 用 cos 和 sin 创建单位圆上的点
|
||||
np.sin(theta_array)))
|
||||
|
||||
# 创建矩阵 A
|
||||
A = np.array([[a, b], # 矩阵 A 的第一行
|
||||
[b, c]]) # 矩阵 A 的第二行
|
||||
|
||||
# 显示单位圆的方程和线性变换后的方程
|
||||
st.latex(r'''z^Tz = 1''') # 显示单位圆的方程
|
||||
st.latex(r'''x = Az''') # 显示线性变换的方程
|
||||
|
||||
# 显示矩阵 A 的 LaTeX 表示
|
||||
st.latex('A =' + bmatrix(A))
|
||||
|
||||
# 对单位圆的点集进行线性变换
|
||||
X_ = X @ A # 对单位圆上的点集 X 应用线性变换矩阵 A
|
||||
|
||||
#%% 使用符号运算求解椭圆的方程
|
||||
x1, x2 = sympy.symbols('x1 x2') # 定义符号变量 x1 和 x2
|
||||
y1, y2 = sympy.symbols('y1 y2') # 定义符号变量 y1 和 y2
|
||||
x = np.array([[x1, x2]]).T # 定义符号向量 x
|
||||
y = np.array([[y1, y2]]).T # 定义符号向量 y
|
||||
|
||||
# 计算 Q 矩阵
|
||||
Q = np.linalg.inv(A @ A.T) # Q = (AA^T)^(-1)
|
||||
D, V = np.linalg.eig(Q) # 计算 Q 的特征值和特征向量
|
||||
D = np.diag(D) # 将特征值转化为对角矩阵
|
||||
|
||||
# 显示 Q 矩阵的分解
|
||||
st.latex(r'Q = \left( AA^T\right)^{-1} = ' + bmatrix(np.round(Q, 3))) # 显示 Q 矩阵
|
||||
st.latex(r'''Q = V \Lambda V^{T}''') # 显示特征分解公式
|
||||
st.latex(bmatrix(np.around(Q, decimals=3)) + '=' +
|
||||
bmatrix(np.around(V, decimals=3)) + '@' +
|
||||
bmatrix(np.around(D, decimals=3)) + '@' +
|
||||
bmatrix(np.around(V.T, decimals=3))) # 显示分解过程
|
||||
|
||||
# 定义单位圆和变换后的椭圆方程
|
||||
f_x = x.T @ np.round(Q, 3) @ x # 单位圆在 Q 矩阵下的方程
|
||||
f_y = y.T @ np.round(D, 3) @ y # 椭圆在对角矩阵 D 下的方程
|
||||
|
||||
# 显示椭圆方程
|
||||
from sympy import *
|
||||
st.write('The formula of the ellipse:') # 显示椭圆方程的标题
|
||||
st.latex(latex(simplify(f_x[0][0])) + ' = 1') # 显示椭圆方程
|
||||
st.write('The formula of the transformed ellipse:') # 显示变换后椭圆方程的标题
|
||||
st.latex(latex(simplify(f_y[0][0])) + ' = 1') # 显示变换后的椭圆方程
|
||||
|
||||
#%% 添加颜色信息到变换后的点集
|
||||
color_array = np.linspace(0, 1, len(X)) # 为每个点生成一个颜色值
|
||||
X_c = np.column_stack((X_, color_array)) # 将颜色信息添加到点集中
|
||||
df = pd.DataFrame(X_c, columns=['x1', 'x2', 'color']) # 将点集转换为 DataFrame 格式
|
||||
|
||||
#%% 绘制散点图
|
||||
fig = px.scatter(df, # 使用 Pandas 数据框作为数据源
|
||||
x="x1", # 横轴为 x1
|
||||
y="x2", # 纵轴为 x2
|
||||
color='color', # 根据颜色值为点上色
|
||||
color_continuous_scale=px.colors.sequential.Rainbow) # 使用彩虹色带
|
||||
|
||||
# 设置图形布局
|
||||
fig.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=500, # 图表宽度为 500 像素
|
||||
height=500) # 图表高度为 500 像素
|
||||
|
||||
# 添加横轴和纵轴的参考线
|
||||
fig.add_hline(y=0, line_color='black') # 添加黑色的水平参考线
|
||||
fig.add_vline(x=0, line_color='black') # 添加黑色的垂直参考线
|
||||
fig.update_layout(coloraxis_showscale=False) # 隐藏颜色条
|
||||
fig.update_xaxes(range=[-3, 3]) # 设置 x 轴范围
|
||||
fig.update_yaxes(range=[-3, 3]) # 设置 y 轴范围
|
||||
|
||||
# 在 Streamlit 页面上显示图表
|
||||
st.plotly_chart(fig)
|
||||
|
||||
|
||||
458
Book4_Ch15_Python_Codes/Bk4_Ch15_01.ipynb
Normal file
458
Book4_Ch15_Python_Codes/Bk4_Ch15_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
604
Book4_Ch15_Python_Codes/Bk4_Ch15_02.ipynb
Normal file
604
Book4_Ch15_Python_Codes/Bk4_Ch15_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
876
Book4_Ch16_Python_Codes/Bk4_Ch16_01.ipynb
Normal file
876
Book4_Ch16_Python_Codes/Bk4_Ch16_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
99
Book4_Ch16_Python_Codes/Streamlit_Bk4_Ch16_01.py
Normal file
99
Book4_Ch16_Python_Codes/Streamlit_Bk4_Ch16_01.py
Normal file
@@ -0,0 +1,99 @@
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
import streamlit as st # 导入 Streamlit 库,用于创建交互式 Web 应用
|
||||
import plotly.express as px # 导入 Plotly Express,用于绘制交互式图表
|
||||
|
||||
import numpy as np # 导入 NumPy,用于数值计算
|
||||
import pandas as pd # 导入 Pandas,用于数据处理
|
||||
from sklearn.datasets import load_iris # 从 scikit-learn 导入 Iris 数据集
|
||||
|
||||
#%%
|
||||
|
||||
# 加载 Iris 数据集
|
||||
iris = load_iris() # 加载 Iris 数据集
|
||||
X = iris.data # 提取特征数据
|
||||
y = iris.target # 提取目标标签
|
||||
|
||||
# 定义特征名称
|
||||
feature_names = ['Sepal length, x1', 'Sepal width, x2',
|
||||
'Petal length, x3', 'Petal width, x4'] # 定义特征名称
|
||||
|
||||
# 将 NumPy 数组 X 转换为 Pandas DataFrame
|
||||
X_df = pd.DataFrame(X, columns=feature_names) # 创建 DataFrame 并指定列名为特征名称
|
||||
|
||||
# 在侧边栏中展示矩阵分解公式
|
||||
with st.sidebar:
|
||||
st.latex('X = USV^T') # 展示 SVD 的公式
|
||||
st.latex('X = \sum_{j=1}^{D} s_j u_j v_j^T') # 展示矩阵分解的展开公式
|
||||
st.latex('X \simeq \sum_{j=1}^{p} s_j u_j v_j^T') # 展示矩阵近似公式
|
||||
|
||||
#%% 原始数据 X
|
||||
|
||||
X = X_df.to_numpy() # 将 DataFrame 转换回 NumPy 数组
|
||||
|
||||
# 计算矩阵 X 的奇异值分解
|
||||
U, S, V_T = np.linalg.svd(X, full_matrices=False) # 进行 SVD 分解
|
||||
S = np.diag(S) # 将奇异值转换为对角矩阵
|
||||
V = V_T.T # 转置右奇异矩阵以获得列向量形式
|
||||
|
||||
# 在侧边栏中添加滑块,用于选择近似矩阵的成分数 p
|
||||
with st.sidebar:
|
||||
p = st.slider('Choose p, number of component to approximate X:', # 滑块标题
|
||||
1, 4, step=1) # 滑块范围为 1 到 4,步进为 1
|
||||
|
||||
#%% 近似矩阵 X
|
||||
|
||||
# 使用前 p 个奇异值、左奇异向量和右奇异向量近似原始矩阵 X
|
||||
X_apprx = U[:, 0:p] @ S[0:p, 0:p] @ V[:, 0:p].T # 根据前 p 个成分计算近似矩阵
|
||||
X_apprx_df = pd.DataFrame(X_apprx, columns=feature_names) # 将近似矩阵转换为 DataFrame
|
||||
|
||||
# 计算误差矩阵
|
||||
Error_df = X_df - X_apprx_df # 原始矩阵与近似矩阵的差
|
||||
|
||||
#%% 可视化原始矩阵、近似矩阵和误差矩阵
|
||||
|
||||
# 使用 Streamlit 的列布局
|
||||
col1, col2, col3 = st.columns(3) # 创建三列布局
|
||||
|
||||
# 在第一列中显示原始矩阵 X
|
||||
with col1:
|
||||
st.latex('X') # 显示原始矩阵的 LaTeX 表示
|
||||
fig_1 = px.imshow(X_df, # 绘制热图表示原始矩阵
|
||||
color_continuous_scale='RdYlBu_r', # 使用红黄蓝色带
|
||||
range_color=[0, 8]) # 设置颜色范围
|
||||
|
||||
fig_1.layout.height = 500 # 设置图像高度为 500 像素
|
||||
fig_1.layout.width = 300 # 设置图像宽度为 300 像素
|
||||
fig_1.update_layout(coloraxis_showscale=False) # 隐藏颜色条
|
||||
st.plotly_chart(fig_1) # 在 Streamlit 页面中显示图表
|
||||
|
||||
# 在第二列中显示近似矩阵 X_apprx
|
||||
with col2:
|
||||
st.latex('\hat{X}') # 显示近似矩阵的 LaTeX 表示
|
||||
fig_2 = px.imshow(X_apprx_df, # 绘制热图表示近似矩阵
|
||||
color_continuous_scale='RdYlBu_r', # 使用红黄蓝色带
|
||||
range_color=[0, 8]) # 设置颜色范围
|
||||
|
||||
fig_2.layout.height = 500 # 设置图像高度为 500 像素
|
||||
fig_2.layout.width = 300 # 设置图像宽度为 300 像素
|
||||
fig_2.update_layout(coloraxis_showscale=False) # 隐藏颜色条
|
||||
st.plotly_chart(fig_2) # 在 Streamlit 页面中显示图表
|
||||
|
||||
# 在第三列中显示误差矩阵 X - X_apprx
|
||||
with col3:
|
||||
st.latex('X - \hat{X}') # 显示误差矩阵的 LaTeX 表示
|
||||
fig_3 = px.imshow(Error_df, # 绘制热图表示误差矩阵
|
||||
color_continuous_scale='RdYlBu_r', # 使用红黄蓝色带
|
||||
range_color=[0, 8]) # 设置颜色范围
|
||||
|
||||
fig_3.layout.height = 500 # 设置图像高度为 500 像素
|
||||
fig_3.layout.width = 300 # 设置图像宽度为 300 像素
|
||||
fig_3.update_layout(coloraxis_showscale=False) # 隐藏颜色条
|
||||
st.plotly_chart(fig_3) # 在 Streamlit 页面中显示图表
|
||||
|
||||
|
||||
348
Book4_Ch17_Python_Codes/Bk4_Ch17_01.ipynb
Normal file
348
Book4_Ch17_Python_Codes/Bk4_Ch17_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
432
Book4_Ch17_Python_Codes/Bk4_Ch17_02.ipynb
Normal file
432
Book4_Ch17_Python_Codes/Bk4_Ch17_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
91
Book4_Ch17_Python_Codes/Streamlit_Bk4_Ch17_01.py
Normal file
91
Book4_Ch17_Python_Codes/Streamlit_Bk4_Ch17_01.py
Normal file
@@ -0,0 +1,91 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
import sympy # 导入 sympy,用于符号计算
|
||||
import numpy as np # 导入 NumPy,用于数值计算
|
||||
from sympy.functions import exp # 从 sympy 导入指数函数
|
||||
import streamlit as st # 导入 Streamlit,用于创建交互式 Web 应用
|
||||
import plotly.figure_factory as ff # 导入 Plotly 工厂方法,用于可视化
|
||||
import plotly.graph_objects as go # 导入 Plotly 图形对象模块,用于复杂图形
|
||||
|
||||
# 定义符号变量和函数
|
||||
x1, x2 = sympy.symbols('x1 x2') # 定义符号变量 x1 和 x2
|
||||
f_x = x1 * exp(-(x1**2 + x2**2)) # 定义函数 f(x1, x2) = x1 * exp(-(x1^2 + x2^2))
|
||||
|
||||
# 在页面上显示函数的 LaTeX 表示
|
||||
st.latex('f(x_1, x_2) = ' + sympy.latex(f_x)) # 显示 f(x1, x2) 的 LaTeX 表达式
|
||||
|
||||
# 计算函数的梯度
|
||||
grad_f = [sympy.diff(f_x, var) for var in (x1, x2)] # 对 x1 和 x2 求偏导,得到梯度
|
||||
st.latex(r'\nabla f = ' + sympy.latex(grad_f) + '^T') # 显示梯度的 LaTeX 表达式
|
||||
|
||||
# 将符号函数转换为数值计算函数
|
||||
f_x_fcn = sympy.lambdify([x1, x2], f_x) # 将 f_x 转换为 Python 函数
|
||||
grad_fcn = sympy.lambdify([x1, x2], grad_f) # 将梯度 grad_f 转换为 Python 函数
|
||||
|
||||
# 定义 x1 和 x2 的值域,用于生成网格
|
||||
x1_array = np.linspace(-2, 2, 100) # 在 [-2, 2] 范围内生成 100 个均匀点
|
||||
x2_array = np.linspace(-2, 2, 100) # 同样生成 x2 的点
|
||||
|
||||
# 创建细网格,用于绘制函数表面
|
||||
xx1, xx2 = np.meshgrid(x1_array, x2_array) # 创建细网格
|
||||
# 创建粗网格,用于绘制梯度场
|
||||
xx1_, xx2_ = np.meshgrid(np.linspace(-2, 2, 20), np.linspace(-2, 2, 20)) # 创建粗网格
|
||||
|
||||
# 在粗网格上计算梯度向量
|
||||
V = grad_fcn(xx1_, xx2_) # 使用梯度函数计算粗网格上的梯度向量
|
||||
# 在细网格上计算函数值
|
||||
ff_x = f_x_fcn(xx1, xx2) # 使用函数 f_x 计算细网格上的函数值
|
||||
|
||||
#%% 可视化
|
||||
|
||||
# 绘制函数表面
|
||||
fig_surface = go.Figure(go.Surface(
|
||||
x=x1_array, # 表面图的 x 轴为 x1 的值
|
||||
y=x2_array, # 表面图的 y 轴为 x2 的值
|
||||
z=ff_x, # 表面图的 z 轴为函数值
|
||||
showscale=False)) # 禁用颜色条
|
||||
fig_surface.update_layout(
|
||||
autosize=False, # 禁用自动调整大小
|
||||
width=800, # 设置图表宽度为 800 像素
|
||||
height=800) # 设置图表高度为 800 像素
|
||||
|
||||
# 在 Streamlit 页面上显示表面图
|
||||
st.plotly_chart(fig_surface)
|
||||
|
||||
# 创建梯度场和等高线图
|
||||
f = ff.create_quiver(xx1_, xx2_, V[0], V[1]) # 创建梯度场图
|
||||
trace1 = f.data[0] # 提取梯度场的图层数据
|
||||
trace2 = go.Contour( # 创建等高线图
|
||||
x=x1_array, # 等高线图的 x 轴为 x1 的值
|
||||
y=x2_array, # 等高线图的 y 轴为 x2 的值
|
||||
z=ff_x, # 等高线图的 z 轴为函数值
|
||||
showscale=False) # 禁用颜色条
|
||||
|
||||
# 将梯度场和等高线图合并为一个图形
|
||||
data = [trace1, trace2] # 合并两个图层数据
|
||||
fig = go.FigureWidget(data) # 创建图形对象
|
||||
fig.update_layout(
|
||||
autosize=False, # 禁用自动调整大小
|
||||
width=800, # 设置图表宽度为 800 像素
|
||||
height=800) # 设置图表高度为 800 像素
|
||||
|
||||
# 添加辅助线
|
||||
fig.add_hline(y=0, line_color='black') # 添加水平辅助线
|
||||
fig.add_vline(x=0, line_color='black') # 添加垂直辅助线
|
||||
|
||||
# 设置坐标轴范围
|
||||
fig.update_xaxes(range=[-2, 2]) # 设置 x 轴范围为 [-2, 2]
|
||||
fig.update_yaxes(range=[-2, 2]) # 设置 y 轴范围为 [-2, 2]
|
||||
fig.update_coloraxes(showscale=False) # 禁用颜色条
|
||||
|
||||
# 在 Streamlit 页面上显示梯度场和等高线图
|
||||
st.plotly_chart(fig)
|
||||
|
||||
|
||||
|
||||
329
Book4_Ch19_Python_Codes/Bk4_Ch19_01.ipynb
Normal file
329
Book4_Ch19_Python_Codes/Bk4_Ch19_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
199
Book4_Ch20_Python_Codes/Bk4_Ch20_01.ipynb
Normal file
199
Book4_Ch20_Python_Codes/Bk4_Ch20_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
137
Book4_Ch20_Python_Codes/Streamlit_Bk4_Ch20_02.py
Normal file
137
Book4_Ch20_Python_Codes/Streamlit_Bk4_Ch20_02.py
Normal file
@@ -0,0 +1,137 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
import streamlit as st # 导入 Streamlit,用于创建交互式 Web 应用
|
||||
import plotly.graph_objects as go # 导入 Plotly 图形对象,用于绘图
|
||||
import sympy # 导入 SymPy,用于符号运算
|
||||
import numpy as np # 导入 NumPy,用于数值计算
|
||||
from scipy.stats import multivariate_normal # 从 SciPy 导入多元正态分布
|
||||
|
||||
# 定义一个函数,将 NumPy 数组转换为 LaTeX bmatrix 格式
|
||||
def bmatrix(a):
|
||||
"""返回一个 LaTeX 矩阵表示"""
|
||||
if len(a.shape) > 2: # 如果数组维度大于2,抛出异常
|
||||
raise ValueError('bmatrix 函数最多支持二维矩阵')
|
||||
lines = str(a).replace('[', '').replace(']', '').splitlines() # 将数组转换为字符串并去掉方括号
|
||||
rv = [r'\begin{bmatrix}'] # LaTeX 矩阵起始
|
||||
rv += [' ' + ' & '.join(l.split()) + r'\\' for l in lines] # 按行格式化
|
||||
rv += [r'\end{bmatrix}'] # LaTeX 矩阵结束
|
||||
return '\n'.join(rv) # 返回拼接后的字符串
|
||||
|
||||
# 在侧边栏中创建滑块,用于调整协方差矩阵的参数
|
||||
with st.sidebar:
|
||||
# 显示协方差矩阵的 LaTeX 表示
|
||||
st.latex(r'''
|
||||
\Sigma = \begin{bmatrix}
|
||||
\sigma_1^2 &
|
||||
\rho \sigma_1 \sigma_2 \\
|
||||
\rho \sigma_1 \sigma_2 &
|
||||
\sigma_2^2
|
||||
\end{bmatrix}''')
|
||||
|
||||
# 定义协方差矩阵的元素
|
||||
st.write('$\sigma_1$') # 显示 σ₁
|
||||
sigma_1 = st.slider('sigma_1', 1.0, 2.0, step=0.1) # 创建滑块,用于调整 σ₁
|
||||
st.write('$\sigma_2$') # 显示 σ₂
|
||||
sigma_2 = st.slider('sigma_2', 1.0, 2.0, step=0.1) # 创建滑块,用于调整 σ₂
|
||||
st.write('$\\rho$') # 显示相关系数 ρ
|
||||
rho_12 = st.slider('rho', -0.9, 0.9, step=0.1) # 创建滑块,用于调整 ρ
|
||||
|
||||
#%%
|
||||
|
||||
# 显示正态分布的概率密度函数公式(1D 和 2D)
|
||||
st.latex(r'''
|
||||
f(x) = \frac{1}{\sqrt{2\pi} \sigma}
|
||||
\exp\left( -\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^{\!2}\,\right)
|
||||
''') # 1D 正态分布公式
|
||||
st.latex(r'''
|
||||
f(x) = \frac{1}{\left( 2 \pi \right)^{\frac{D}{2}}
|
||||
\begin{vmatrix}
|
||||
\Sigma
|
||||
\end{vmatrix}^{\frac{1}{2}}}
|
||||
\exp\left(
|
||||
-\frac{1}{2}
|
||||
\left( x - \mu \right)^{T} \Sigma^{-1} \left( x - \mu \right)
|
||||
\right)
|
||||
''') # 2D 正态分布公式
|
||||
|
||||
#%% 定义网格和协方差矩阵
|
||||
|
||||
# 定义 x1 和 x2 的值域
|
||||
x1 = np.linspace(-3, 3, 101) # 在 [-3, 3] 上生成 101 个点
|
||||
x2 = np.linspace(-3, 3, 101) # 同样生成 x2 的点
|
||||
xx1, xx2 = np.meshgrid(x1, x2) # 创建网格点
|
||||
pos = np.dstack((xx1, xx2)) # 将网格点堆叠为多维数组
|
||||
|
||||
# 定义协方差矩阵
|
||||
Sigma = [[sigma_1**2, rho_12 * sigma_1 * sigma_2], # 第一行
|
||||
[rho_12 * sigma_1 * sigma_2, sigma_2**2]] # 第二行
|
||||
|
||||
# 创建多元正态分布对象
|
||||
rv = multivariate_normal([0, 0], Sigma) # 均值为 [0, 0],协方差矩阵为 Sigma
|
||||
PDF_zz = rv.pdf(pos) # 计算网格点上的概率密度值
|
||||
|
||||
#%%
|
||||
|
||||
# 将协方差矩阵转换为 NumPy 数组
|
||||
Sigma = np.array(Sigma)
|
||||
|
||||
# 计算协方差矩阵的特征值和特征向量
|
||||
D, V = np.linalg.eig(Sigma) # 特征分解
|
||||
D = np.diag(D) # 将特征值转化为对角矩阵
|
||||
|
||||
# 显示协方差矩阵和分解结果的 LaTeX 表示
|
||||
st.latex(r'''\Sigma = \begin{bmatrix}%s & %s\\%s & %s\end{bmatrix}'''
|
||||
% (sigma_1**2,
|
||||
rho_12 * sigma_1 * sigma_2,
|
||||
rho_12 * sigma_1 * sigma_2,
|
||||
sigma_2**2)) # 显示协方差矩阵
|
||||
st.latex(r'''\Sigma = V \Lambda V^{T}''') # 显示特征分解公式
|
||||
st.latex(bmatrix(Sigma) + '=' +
|
||||
bmatrix(np.around(V, decimals=3)) + '@' +
|
||||
bmatrix(np.around(D, decimals=3)) + '@' +
|
||||
bmatrix(np.around(V.T, decimals=3))) # 显示分解的详细过程
|
||||
|
||||
#%% 绘制 3D 表面图
|
||||
|
||||
# 创建 3D 表面图
|
||||
fig_surface = go.Figure(go.Surface(
|
||||
x=x1, # x 轴为 x1 的值
|
||||
y=x2, # y 轴为 x2 的值
|
||||
z=PDF_zz, # z 轴为概率密度值
|
||||
colorscale='RdYlBu_r')) # 使用红黄蓝颜色映射
|
||||
fig_surface.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=500, # 图表宽度
|
||||
height=500) # 图表高度
|
||||
st.plotly_chart(fig_surface) # 在 Streamlit 页面上显示 3D 表面图
|
||||
|
||||
#%% 绘制 2D 等高线图
|
||||
|
||||
# 创建 2D 等高线图
|
||||
fig_contour = go.Figure(
|
||||
go.Contour(
|
||||
z=PDF_zz, # 等高线图的高度值
|
||||
x=x1, # x 轴为 x1 的值
|
||||
y=x2, # y 轴为 x2 的值
|
||||
colorscale='RdYlBu_r' # 使用红黄蓝颜色映射
|
||||
))
|
||||
|
||||
# 设置等高线图的布局
|
||||
fig_contour.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=500, # 图表宽度
|
||||
height=500) # 图表高度
|
||||
|
||||
# 在 Streamlit 页面上显示 2D 等高线图
|
||||
st.plotly_chart(fig_contour)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
155
Book4_Ch21_Python_Codes/Bk4_Ch21_01.ipynb
Normal file
155
Book4_Ch21_Python_Codes/Bk4_Ch21_01.ipynb
Normal file
@@ -0,0 +1,155 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73bd968b-d970-4a05-94ef-4e7abf990827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Chapter 21\n",
|
||||
"\n",
|
||||
"# 判断正定矩阵\n",
|
||||
"Book_4《矩阵力量》 | 鸢尾花书:从加减乘除到机器学习 (第二版)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54195758-c635-45fc-be6a-ebae54b12d78",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"这段代码的功能是判断一个矩阵是否为正定矩阵。首先,正定矩阵 \\( A \\) 的定义要求其必须是对称矩阵,即满足 \\( A = A^T \\)。若矩阵 \\( A \\) 是对称矩阵,代码进一步检查是否可以对其进行 Cholesky 分解。Cholesky 分解是一种将正定矩阵 \\( A \\) 表示为下三角矩阵 \\( L \\) 和其转置 \\( L^T \\) 的操作,即\n",
|
||||
"\n",
|
||||
"$$\n",
|
||||
"A = L L^T\n",
|
||||
"$$\n",
|
||||
"\n",
|
||||
"若分解成功,则说明 \\( A \\) 是正定矩阵,函数返回 `True`;若分解失败(引发 `LinAlgError` 异常),则矩阵不是正定矩阵,函数返回 `False`。在这段代码中,示例矩阵\n",
|
||||
"\n",
|
||||
"$$\n",
|
||||
"A = \\begin{bmatrix} 1 & 0 \\\\ 0 & 0 \\end{bmatrix}\n",
|
||||
"$$\n",
|
||||
"\n",
|
||||
"不是正定矩阵,因此代码会输出 `False`。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "53cb4618-5e9f-4388-b8e2-893534f3cfbe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np # 导入 numpy 进行数值计算"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74d9b36f-c406-4573-a99e-db75e9380e36",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 定义判断正定矩阵的函数"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7abef436-2925-4561-90ae-dc85e1fd34f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def is_pos_def(A): # 函数 is_pos_def 用于判断矩阵是否为正定矩阵\n",
|
||||
" if np.array_equal(A, A.T): # 检查矩阵是否为对称矩阵\n",
|
||||
" try:\n",
|
||||
" np.linalg.cholesky(A) # 尝试进行 Cholesky 分解\n",
|
||||
" return True # 若成功,则矩阵为正定矩阵\n",
|
||||
" except np.linalg.LinAlgError: # 若分解失败,捕获异常\n",
|
||||
" return False # 分解失败则矩阵不是正定矩阵\n",
|
||||
" else:\n",
|
||||
" return False # 若矩阵不对称,则直接返回 False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7632f259-4cc2-40b5-8886-5c2c670b876c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 定义待检测的矩阵"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "11933a91-89ae-4529-b5f6-302fa7eebf2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"A = np.array([[1, 0], \n",
|
||||
" [0, 0]]) # 定义矩阵 A"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7bb2c23d-c6ef-4f9a-8c48-01f55905fa97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## 打印结果"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "6f4d7b05-2a48-40b5-879a-12e58b6ff62a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(is_pos_def(A)) # 调用函数 is_pos_def,打印矩阵 A 是否为正定矩阵"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85a80909-2aac-49ed-bb7a-f8cc6b80ee7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ecd322f4-f919-4be2-adc3-69d28ef25e69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
392
Book4_Ch21_Python_Codes/Bk4_Ch21_02.ipynb
Normal file
392
Book4_Ch21_Python_Codes/Bk4_Ch21_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
76
Book4_Ch21_Python_Codes/Streamlit_Bk4_Ch21_02.py
Normal file
76
Book4_Ch21_Python_Codes/Streamlit_Bk4_Ch21_02.py
Normal file
@@ -0,0 +1,76 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
import streamlit as st # 导入 Streamlit,用于构建交互式 Web 应用
|
||||
import plotly.graph_objects as go # 导入 Plotly 的图形对象模块,用于绘图
|
||||
import sympy # 导入 SymPy,用于符号运算
|
||||
import numpy as np # 导入 NumPy,用于数值计算
|
||||
|
||||
def bmatrix(a): # 定义一个函数,将 NumPy 数组转换为 LaTeX bmatrix 格式的字符串
|
||||
"""返回一个 LaTeX 矩阵表示"""
|
||||
if len(a.shape) > 2: # 如果输入数组维度大于 2,抛出异常
|
||||
raise ValueError('bmatrix 函数只支持二维矩阵')
|
||||
lines = str(a).replace('[', '').replace(']', '').splitlines() # 将数组转换为字符串并移除方括号
|
||||
rv = [r'\begin{bmatrix}'] # LaTeX 矩阵开始符号
|
||||
rv += [' ' + ' & '.join(l.split()) + r'\\' for l in lines] # 按行格式化
|
||||
rv += [r'\end{bmatrix}'] # LaTeX 矩阵结束符号
|
||||
return '\n'.join(rv) # 返回拼接后的 LaTeX 字符串
|
||||
|
||||
with st.sidebar: # 在侧边栏中创建交互内容
|
||||
st.latex(r'''A = \begin{bmatrix} a & b\\ b & c \end{bmatrix}''') # 显示矩阵 A 的 LaTeX 表示
|
||||
st.latex(r'''f(x_1,x_2) = ax_1^2 + 2bx_1x_2 + cx_2^2''') # 显示二次形式的 LaTeX 表示
|
||||
a = st.slider('a', -2.0, 2.0, step=0.1) # 创建滑块,用于设置矩阵 A 的元素 a
|
||||
b = st.slider('b', -2.0, 2.0, step=0.1) # 创建滑块,用于设置矩阵 A 的元素 b
|
||||
c = st.slider('c', -2.0, 2.0, step=0.1) # 创建滑块,用于设置矩阵 A 的元素 c
|
||||
|
||||
x1_ = np.linspace(-2, 2, 101) # 在 [-2, 2] 范围内生成 101 个均匀点,用于 x1
|
||||
x2_ = np.linspace(-2, 2, 101) # 同样生成 x2 的点
|
||||
xx1, xx2 = np.meshgrid(x1_, x2_) # 生成网格点,方便绘制 3D 和等高线图
|
||||
|
||||
x1, x2 = sympy.symbols('x1 x2') # 定义符号变量 x1 和 x2
|
||||
A = np.array([[a, b], # 定义矩阵 A 的第一行
|
||||
[b, c]]) # 定义矩阵 A 的第二行
|
||||
D, V = np.linalg.eig(A) # 计算矩阵 A 的特征值 D 和特征向量 V
|
||||
D = np.diag(D) # 将特征值转化为对角矩阵
|
||||
|
||||
st.latex(r'''A = \begin{bmatrix}%s & %s\\%s & %s\end{bmatrix}''' % (a, b, b, c)) # 显示矩阵 A 的 LaTeX 表示
|
||||
st.latex(r'''A = V \Lambda V^{T}''') # 显示特征分解公式
|
||||
st.latex(bmatrix(A) + '=' + bmatrix(np.around(V, decimals=3)) + '@' + bmatrix(np.around(D, decimals=3)) + '@' + bmatrix(np.around(V.T, decimals=3))) # 显示特征分解的详细过程
|
||||
|
||||
x = np.array([[x1, x2]]).T # 定义符号向量 x
|
||||
f_x = a * x1**2 + 2 * b * x1 * x2 + c * x2**2 # 定义二次形式 f(x1, x2)
|
||||
st.latex(r'''f(x_1,x_2) = ''') # 显示二次形式的 LaTeX 表示
|
||||
st.write(f_x) # 显示二次形式的符号表达式
|
||||
|
||||
f_x_fcn = sympy.lambdify([x1, x2], f_x) # 将符号函数 f(x1, x2) 转换为数值计算函数
|
||||
ff_x = f_x_fcn(xx1, xx2) # 在网格点上计算二次形式的值
|
||||
|
||||
fig_surface = go.Figure(go.Surface( # 创建 3D 表面图
|
||||
x=x1_, # 表面图的 x 轴为 x1 的值
|
||||
y=x2_, # 表面图的 y 轴为 x2 的值
|
||||
z=ff_x, # 表面图的 z 轴为二次形式的值
|
||||
colorscale='RdYlBu_r')) # 使用红黄蓝颜色映射
|
||||
fig_surface.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=500, # 设置图表宽度为 500 像素
|
||||
height=500) # 设置图表高度为 500 像素
|
||||
st.plotly_chart(fig_surface) # 在 Streamlit 页面上显示 3D 表面图
|
||||
|
||||
fig_contour = go.Figure( # 创建 2D 等高线图
|
||||
go.Contour(
|
||||
z=ff_x, # 等高线的高度值
|
||||
x=x1_, # 等高线图的 x 轴为 x1 的值
|
||||
y=x2_, # 等高线图的 y 轴为 x2 的值
|
||||
colorscale='RdYlBu_r' # 使用红黄蓝颜色映射
|
||||
))
|
||||
fig_contour.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=500, # 设置图表宽度为 500 像素
|
||||
height=500) # 设置图表高度为 500 像素
|
||||
st.plotly_chart(fig_contour) # 在 Streamlit 页面上显示 2D 等高线图
|
||||
|
||||
90
Book4_Ch21_Python_Codes/Streamlit_Bk4_Ch21_03.py
Normal file
90
Book4_Ch21_Python_Codes/Streamlit_Bk4_Ch21_03.py
Normal file
@@ -0,0 +1,90 @@
|
||||
|
||||
###############
|
||||
# Authored by Weisheng Jiang
|
||||
# Book 4 | From Basic Arithmetic to Machine Learning
|
||||
# Published and copyrighted by Tsinghua University Press
|
||||
# Beijing, China, 2025
|
||||
###############
|
||||
|
||||
import numpy as np # 导入 NumPy,用于数值计算
|
||||
from sympy import lambdify, diff, exp, latex, simplify, symbols # 从 SymPy 导入符号计算相关模块
|
||||
import plotly.figure_factory as ff # 从 Plotly 导入工厂方法,用于绘制梯度向量和流线
|
||||
import plotly.graph_objects as go # 从 Plotly 导入图形对象模块,用于绘图
|
||||
import streamlit as st # 导入 Streamlit,用于构建交互式 Web 应用
|
||||
|
||||
x1, x2 = symbols('x1 x2') # 定义符号变量 x1 和 x2
|
||||
num = 301 # 设置网格点数量
|
||||
x1_array = np.linspace(-3, 3, num) # 在 [-3, 3] 范围内生成 301 个均匀点用于 x1
|
||||
x2_array = np.linspace(-3, 3, num) # 在 [-3, 3] 范围内生成 301 个均匀点用于 x2
|
||||
xx1, xx2 = np.meshgrid(x1_array, x2_array) # 创建网格点,用于绘制函数和梯度图
|
||||
|
||||
# 定义函数 f(x1, x2)
|
||||
f_x = 3 * (1 - x1)**2 * exp(-(x1**2) - (x2 + 1)**2) \
|
||||
- 10 * (x1 / 5 - x1**3 - x2**5) * exp(-x1**2 - x2**2) \
|
||||
- 1 / 3 * exp(-(x1 + 1)**2 - x2**2) # 定义复杂的二元函数
|
||||
|
||||
f_x_fcn = lambdify([x1, x2], f_x) # 将符号函数 f_x 转换为数值计算函数
|
||||
f_zz = f_x_fcn(xx1, xx2) # 在网格点上计算函数值,用于绘制表面图和等高线图
|
||||
|
||||
st.latex('f(x_1, x_2) = ' + latex(f_x)) # 在 Streamlit 页面中显示函数的 LaTeX 表示
|
||||
|
||||
# 计算梯度
|
||||
grad_f = [diff(f_x, var) for var in (x1, x2)] # 对 f_x 分别对 x1 和 x2 求偏导,得到梯度向量
|
||||
grad_fcn = lambdify([x1, x2], grad_f) # 将梯度向量转换为数值计算函数
|
||||
|
||||
x1__ = np.linspace(-3, 3, 40) # 在 [-3, 3] 范围内生成 40 个点用于 x1(粗网格)
|
||||
x2__ = np.linspace(-3, 3, 40) # 在 [-3, 3] 范围内生成 40 个点用于 x2(粗网格)
|
||||
xx1_, xx2_ = np.meshgrid(x1__, x2__) # 创建粗网格点
|
||||
V = grad_fcn(xx1_, xx2_) # 在粗网格点上计算梯度向量,用于绘制梯度图
|
||||
|
||||
# 绘制 3D 表面图
|
||||
fig_surface = go.Figure(go.Surface(
|
||||
x=x1_array, # 表面图的 x 轴为 x1 网格点
|
||||
y=x2_array, # 表面图的 y 轴为 x2 网格点
|
||||
z=f_zz, # 表面图的 z 轴为函数值
|
||||
showscale=False, # 禁用颜色条
|
||||
colorscale='RdYlBu_r')) # 使用红黄蓝色带
|
||||
fig_surface.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=800, # 设置图表宽度为 800 像素
|
||||
height=600) # 设置图表高度为 600 像素
|
||||
st.plotly_chart(fig_surface) # 在 Streamlit 页面中显示 3D 表面图
|
||||
|
||||
# 绘制梯度向量图和等高线图
|
||||
f = ff.create_quiver(xx1_, xx2_, V[0], V[1], arrow_scale=.1, scale=0.03) # 创建梯度向量图
|
||||
f_stream = ff.create_streamline(x1__, x2__, V[0], V[1], arrow_scale=.1) # 创建流线图
|
||||
trace1 = f.data[0] # 提取梯度向量的图层数据
|
||||
trace3 = f_stream.data[0] # 提取流线的图层数据
|
||||
trace2 = go.Contour(
|
||||
x=x1_array, # 等高线图的 x 轴为 x1 网格点
|
||||
y=x2_array, # 等高线图的 y 轴为 x2 网格点
|
||||
z=f_zz, # 等高线图的高度值为函数值
|
||||
showscale=False, # 禁用颜色条
|
||||
colorscale='RdYlBu_r') # 使用红黄蓝色带
|
||||
|
||||
data = [trace1, trace2] # 将梯度向量图和等高线图组合
|
||||
fig = go.FigureWidget(data) # 创建图形对象
|
||||
fig.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=800, # 设置图表宽度为 800 像素
|
||||
height=800) # 设置图表高度为 800 像素
|
||||
fig.add_hline(y=0, line_color='black') # 添加水平辅助线
|
||||
fig.add_vline(x=0, line_color='black') # 添加垂直辅助线
|
||||
fig.update_xaxes(range=[-2, 2]) # 设置 x 轴范围为 [-2, 2]
|
||||
fig.update_yaxes(range=[-2, 2]) # 设置 y 轴范围为 [-2, 2]
|
||||
fig.update_coloraxes(showscale=False) # 禁用颜色条
|
||||
st.plotly_chart(fig) # 在 Streamlit 页面中显示组合图形
|
||||
|
||||
# 绘制流线图和等高线图
|
||||
data2 = [trace3, trace2] # 将流线图和等高线图组合
|
||||
fig2 = go.FigureWidget(data2) # 创建图形对象
|
||||
fig2.update_layout(
|
||||
autosize=False, # 禁用自动调整尺寸
|
||||
width=800, # 设置图表宽度为 800 像素
|
||||
height=800) # 设置图表高度为 800 像素
|
||||
fig2.add_hline(y=0, line_color='black') # 添加水平辅助线
|
||||
fig2.add_vline(x=0, line_color='black') # 添加垂直辅助线
|
||||
fig2.update_xaxes(range=[-2, 2]) # 设置 x 轴范围为 [-2, 2]
|
||||
fig2.update_yaxes(range=[-2, 2]) # 设置 y 轴范围为 [-2, 2]
|
||||
fig2.update_coloraxes(showscale=False) # 禁用颜色条
|
||||
st.plotly_chart(fig2) # 在 Streamlit 页面中显示流线图和等高线图
|
||||
571
Book4_Ch22_Python_Codes/Bk4_Ch22_01.ipynb
Normal file
571
Book4_Ch22_Python_Codes/Bk4_Ch22_01.ipynb
Normal file
File diff suppressed because one or more lines are too long
587
Book4_Ch24_Python_Codes/Bk4_Ch24_01.ipynb
Normal file
587
Book4_Ch24_Python_Codes/Bk4_Ch24_01.ipynb
Normal file
@@ -0,0 +1,587 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73bd968b-d970-4a05-94ef-4e7abf990827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Chapter 24\n",
|
||||
"\n",
|
||||
"# 数据分解\n",
|
||||
"Book_4《矩阵力量》 | 鸢尾花书:从加减乘除到机器学习 (第二版)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84c7e223-f9df-4575-a7d8-ab27812fea5c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"这段代码对鸢尾花数据集的特征矩阵 $X$ 进行了多种矩阵操作和分解,以便分析数据的结构和特性。首先,代码计算了 $X$ 的 **Gram 矩阵** $G = X^T X$,展示了数据样本的内积关系。接着,基于 $G$ 构造了 **余弦相似度矩阵** $C$,通过对 $G$ 中的特征进行归一化处理,使得 $C$ 中的每个元素代表样本间的相似性。随后,代码计算数据的 **质心**(均值向量) $E(X)$ 并生成 **去均值数据矩阵** $X_c = X - E(X)$,使数据中心化,以便消除均值对数据的影响。\n",
|
||||
"\n",
|
||||
"代码进一步计算了 **协方差矩阵** $\\Sigma = \\frac{1}{N} X_c^T X_c$ 和 **相关矩阵** $\\rho$,分别表示特征间的协方差和标准化后的相关性。\n",
|
||||
"\n",
|
||||
"接下来,代码进行了多种矩阵分解,包括:\n",
|
||||
"\n",
|
||||
"1. **QR 分解**:将原始矩阵 $X$ 分解为一个正交矩阵 $Q$ 和一个上三角矩阵 $R$,满足 $X = QR$。\n",
|
||||
" \n",
|
||||
"2. **Cholesky 分解**:对 Gram 矩阵 $G$ 和协方差矩阵 $\\Sigma$ 进行分解,得到其对应的下三角矩阵 $L$,使得 $G = LL^T$ 和 $\\Sigma = LL^T$。\n",
|
||||
" \n",
|
||||
"3. **特征值分解**:对 Gram 矩阵 $G$、协方差矩阵 $\\Sigma$ 和相关矩阵 $\\rho$ 进行特征值分解,得到其特征值(对角化的 $\\Lambda$ 矩阵)和特征向量矩阵 $V$,满足 $G = V \\Lambda V^T$、$\\Sigma = V \\Lambda V^T$ 和 $\\rho = V \\Lambda V^T$。\n",
|
||||
"\n",
|
||||
"4. **奇异值分解(SVD)**:对原始数据 $X$、去均值数据 $X_c$ 和标准化数据 $Z_X$ 分别进行 SVD 分解,得到分解形式 $X = U S V^T$,其中 $U$ 和 $V$ 分别表示数据的左右奇异向量矩阵,$S$ 是包含奇异值的对角矩阵。SVD 提供了数据的主成分方向和大小,用于后续的主成分分析(PCA)或特征提取。\n",
|
||||
"\n",
|
||||
"通过这些分解和操作,代码全面分析了数据的相似性、协方差结构、相关性、主成分和特征方向,为理解数据的内在结构提供了重要参考。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f95b3fca-b77f-4e10-a84d-62f6876de01a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np # 导入 numpy 进行数值计算\n",
|
||||
"import matplotlib.pyplot as plt # 导入 matplotlib 用于绘图\n",
|
||||
"import pandas as pd # 导入 pandas 进行数据操作\n",
|
||||
"from sklearn.datasets import load_iris # 从 sklearn 加载鸢尾花数据集"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c3ab92a3-7a2b-4655-b39f-41014f0bf0c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from numpy.linalg import inv # 导入 inv 函数用于矩阵求逆\n",
|
||||
"from scipy.stats import zscore # 导入 zscore 函数用于标准化\n",
|
||||
"from numpy.linalg import qr # 导入 qr 函数进行 QR 分解\n",
|
||||
"from numpy.linalg import cholesky as chol # 导入 cholesky 函数用于 Cholesky 分解\n",
|
||||
"from numpy.linalg import eig # 导入 eig 函数进行特征值分解\n",
|
||||
"from numpy.linalg import svd # 导入 svd 函数用于奇异值分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c401295c-eabb-4713-94aa-2311ec8a972d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 加载数据"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "2397142d-b32e-465f-834f-65b06c127cea",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris = load_iris() # 从 sklearn 加载鸢尾花数据集"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "313bf88d-fcfd-4947-beb3-bcb2d7bc56e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = iris.data # 特征矩阵 X\n",
|
||||
"y = iris.target # 目标标签 y"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "91f49a78-e515-4201-9eb3-d8993b32471d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"feature_names = ['Sepal length, x1', 'Sepal width, x2',\n",
|
||||
" 'Petal length, x3', 'Petal width, x4'] # 特征名称"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "730bcf6c-1e64-4bda-85fe-80a14077e339",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 将特征矩阵 X 转换为 DataFrame 格式"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c86246d1-a83b-4495-a982-36e715eeae6e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_df = pd.DataFrame(X, columns=feature_names)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54cef963-570f-47c3-b5dd-222fdc550707",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 原始数据 X"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "802ac1d9-148c-4e3b-b00f-9f1ee45648d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = X_df.to_numpy() # 转换 DataFrame 为 numpy 数组"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "877eeaab-7719-4b83-b829-280143f81c5a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Gram 矩阵 G"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "775d5366-beec-4349-bae2-7726b9735f6e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G = X.T @ X # 计算 Gram 矩阵,G = X^T X"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "967a5f97-96e3-43c5-9527-76d2879e8f03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 余弦相似度矩阵 C"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "3639f25d-1ad5-4646-bfd5-675cf79fd512",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 使用特征范数计算相似度\n",
|
||||
"S_norm = np.diag(np.sqrt(np.diag(G))) # 生成缩放矩阵,对角线元素为每列特征的范数"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "2052f952-8ee5-4914-9bb9-33bce33b92b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"C = inv(S_norm) @ G @ inv(S_norm) # 计算余弦相似度矩阵 C"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fa9f5362-0b41-4a6c-8ca1-2324672bd552",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 数据矩阵的质心 E(X)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "7e51dd81-5415-46ee-be03-9bbfba20be8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"E_X = X_df.mean().to_frame().T # 计算 X 的均值,并转换为单行 DataFrame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83005aa2-ba44-4ee6-8bcd-c70f86d0c366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 数据去均值处理 X_c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "e98077f7-6426-427d-a6f4-8a85bef517c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_c = X_df.sub(X_df.mean()) # 每列减去其均值,得到去均值矩阵 X_c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f96ef51-d242-4f3e-a95d-c4174d394d63",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 协方差矩阵 Sigma"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "8aae2ef4-1803-4522-9d72-0749936c3bab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SIGMA = X_df.cov() # 计算协方差矩阵 SIGMA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2aee8ac6-dd46-446f-82d7-4f953a3b03aa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 相关矩阵 P"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "b79c796e-be1e-4ace-be31-87af4c1daf84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RHO = X_df.corr() # 计算相关矩阵 RHO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b087fa60-77ef-4280-8e20-d4efa134ba69",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 数据标准化 Z_X"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "8b473969-650c-4c52-ae8d-f89ce7b9dce0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Z_X = zscore(X_df) # 对 X 的每列标准化,使其均值为 0,标准差为 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a70b1f17-e178-4f52-9346-e330986b22b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## QR 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "fc15ec6d-931a-4472-b1d0-7d1aaa1285dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Q, R = qr(X_df, mode='reduced') # 对 X 进行 QR 分解,mode='reduced' 保留最小矩阵维度"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d1977ba7-04d1-4d11-b9a4-dc5b0e1ff5c8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cholesky 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "7f129525-f4e7-48b7-ba19-bc49ad4515e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"L_G = chol(G) # 对 Gram 矩阵 G 进行 Cholesky 分解,得到下三角矩阵 L_G"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "293cfc24-8d6c-48b5-8b55-0fd2173f10de",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"R_G = L_G.T # 上三角矩阵 R_G 为 L_G 的转置"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "44091d77-46f4-416f-8305-d66438fc9d62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 协方差矩阵 Sigma 的 Cholesky 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "959d25e3-0ab4-40c1-b79d-f6388712b381",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"L_Sigma = chol(SIGMA) # 对协方差矩阵 SIGMA 进行 Cholesky 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "94d8ce95-d99d-43a1-b244-402cf2e07f28",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"R_Sigma = L_Sigma.T # 上三角矩阵 R_Sigma 为 L_Sigma 的转置"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f02b402-a8bc-464a-9006-4dca94b9b219",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Gram 矩阵 G 的特征值分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "b63e3321-afb5-4b9e-ac92-a74c39537169",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Lambs_G, V_G = eig(G) # 对 G 进行特征值分解,得到特征值 Lambs_G 和特征向量 V_G"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "40135be7-b292-4765-8a8b-866b4f22418f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Lambs_G = np.diag(Lambs_G) # 将特征值转换为对角矩阵形式"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "559af984-f9b8-4f34-8c69-af3035dfb589",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 协方差矩阵 Sigma 的特征值分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "0280dc12-1245-4b5d-8099-8fe3abc94ccc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Lambs_sigma, V_sigma = eig(SIGMA) # 对 SIGMA 进行特征值分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "7941e036-5e24-4a15-8612-20dfd3d60779",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Lambs_sigma = np.diag(Lambs_sigma) # 将特征值转换为对角矩阵形式"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d387720b-43f9-4c24-a918-48eb686acb8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 相关矩阵 P 的特征值分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "7227d988-df95-44f1-9986-bd3f0922d7da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Lambs_P, V_P = eig(RHO) # 对相关矩阵 RHO 进行特征值分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "a8a40ce6-cdde-4fea-be3e-1d34367b9aac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Lambs_P = np.diag(Lambs_P) # 将特征值转换为对角矩阵形式"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2891d64a-1faa-47a6-bcba-5c56611e96cf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 原始数据 X 的 SVD 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "37ffec3a-f1ea-45dc-9fa0-be3e27bc15e7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"U_X, S_X_, V_X = svd(X_df, full_matrices=False) # 对 X 进行 SVD 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "472ff2d6-e2fe-4ba5-b341-cc13ebf416fe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"V_X = V_X.T # 转置 V_X"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "fd6103a7-12b4-431f-9ad5-1d7d4a3c6564",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"S_X = np.diag(S_X_) # 将奇异值转换为对角矩阵"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93dc741b-c4b5-48ae-a32e-8b80bafa2ae6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 去均值数据 X_c 的 SVD 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "58e1f3b8-6838-410e-bbfa-1b538b8713c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"U_Xc, S_Xc, V_Xc = svd(X_c, full_matrices=False) # 对去均值后的数据 X_c 进行 SVD 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "98a5eb4a-8112-4021-9339-27779343347c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"V_Xc = V_Xc.T # 转置 V_Xc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "fb80d669-ea06-4ee3-a74e-1e3b8a00e954",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"S_Xc = np.diag(S_Xc) # 将奇异值转换为对角矩阵"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9b015450-0229-445f-bf61-487d53fa3efe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 标准化数据 Z_X 的 SVD 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "689bb4cf-2b59-45ca-b8e4-acaf31d8e139",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"U_Z, S_Z, V_Z = svd(Z_X, full_matrices=False) # 对标准化后的数据 Z_X 进行 SVD 分解"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "a296884a-2df5-4b34-b1ad-ffec488c2f37",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"V_Z = V_Z.T # 转置 V_Z"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "a3bd573a-de20-4d7c-81a2-f88c2bcf0263",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"S_Z = np.diag(S_Z) # 将奇异值转换为对角矩阵"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85a80909-2aac-49ed-bb7a-f8cc6b80ee7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ecd322f4-f919-4be2-adc3-69d28ef25e69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user