mirror of
https://github.com/Estom/notes.git
synced 2026-04-02 02:20:25 +08:00
机器学习2
This commit is contained in:
@@ -0,0 +1,130 @@
|
||||
# 聚类
|
||||
## cluster.vq
|
||||
Provides routines for k-means clustering, generating code books from k-means models and quantizing vectors by comparing them with centroids in a code book.
|
||||
|
||||
function | introduction
|
||||
----|----
|
||||
whiten(obs[, check_finite]) | Normalize a group of observations on a per feature basis.每行元素除以该行的标准差。
|
||||
vq(obs, code_book[, check_finite]) | Assign codes from a code book to observations.
|
||||
kmeans(obs, k_or_guess[, iter, thresh, …]) | Performs k-means on a set of observation vectors forming k clusters.
|
||||
kmeans2(data, k[, iter, thresh, minit, …]) | Classify a set of observations into k clusters using the k-means algorithm.
|
||||
|
||||
## cluster.hierarchy
|
||||
|
||||
Hierarchical clustering (scipy.cluster.hierarchy)
|
||||
|
||||
* These functions cut hierarchical clusterings into flat clusterings or find the roots of the forest formed by a cut by providing the flat cluster ids of each observation.
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
fcluster(Z, t[, criterion, depth, R, monocrit]) | Form flat clusters from the hierarchical clustering defined by the given linkage matrix.
|
||||
fclusterdata(X, t[, criterion, metric, …]) | Cluster observation data using a given metric.
|
||||
leaders(Z, T) | Return the root nodes in a hierarchical clustering.
|
||||
|
||||
* These are routines for agglomerative clustering.
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
linkage(y[, method, metric, optimal_ordering]) | Perform hierarchical/agglomerative clustering.
|
||||
single(y) | Perform single/min/nearest linkage on the condensed distance matrix y.
|
||||
complete(y) | Perform complete/max/farthest point linkage on a condensed distance matrix.
|
||||
average(y) | Perform average/UPGMA linkage on a condensed distance matrix.
|
||||
weighted(y) | Perform weighted/WPGMA linkage on the condensed distance matrix.
|
||||
centroid(y) | Perform centroid/UPGMC linkage.
|
||||
median(y) | Perform median/WPGMC linkage.
|
||||
ward(y) | Perform Ward’s linkage on a condensed distance matrix.
|
||||
|
||||
* These routines compute statistics on hierarchies.
|
||||
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
cophenet(Z[, Y]) | Calculate the cophenetic distances between each observation in the hierarchical clustering defined by the linkage Z.
|
||||
from_mlab_linkage(Z) | Convert a linkage matrix generated by MATLAB(TM) to a new linkage matrix compatible with this module.
|
||||
inconsistent(Z[, d]) | Calculate inconsistency statistics on a linkage matrix.
|
||||
maxinconsts(Z, R) | Return the maximum inconsistency coefficient for each non-singleton cluster and its children.
|
||||
maxdists(Z) | Return the maximum distance between any non-singleton cluster.
|
||||
maxRstat(Z, R, i) | Return the maximum statistic for each non-singleton cluster and its children.
|
||||
|
||||
to_mlab_linkage(Z) | Convert a linkage matrix to a MATLAB(TM) compatible one.
|
||||
|
||||
* Routines for visualizing flat clusters.
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
dendrogram(Z[, p, truncate_mode, …]) | Plot the hierarchical clustering as a dendrogram.
|
||||
|
||||
* These are data structures and routines for representing hierarchies as tree objects.
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
ClusterNode(id[, left, right, dist, count]) | A tree node class for representing a cluster.
|
||||
leaves_list(Z) | Return a list of leaf node ids.
|
||||
to_tree(Z[, rd]) | Convert a linkage matrix into an easy-to-use tree object.
|
||||
cut_tree(Z[, n_clusters, height]) | Given a linkage matrix Z, return the cut tree.
|
||||
optimal_leaf_ordering(Z, y[, metric]) | Given a linkage matrix Z and distance, reorder the cut tree.
|
||||
|
||||
* These are predicates for checking the validity of linkage and inconsistency matrices as well as for checking isomorphism of two flat cluster assignments.
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
is_valid_im(R[, warning, throw, name]) | Return True if the inconsistency matrix passed is valid.
|
||||
is_valid_linkage(Z[, warning, throw, name]) | Check the validity of a linkage matrix.
|
||||
is_isomorphic(T1, T2) | Determine if two different cluster assignments are equivalent.
|
||||
is_monotonic(Z) | Return True if the linkage passed is monotonic.
|
||||
correspond(Z, Y) | Check for correspondence between linkage and condensed distance matrices.
|
||||
num_obs_linkage(Z) | Return the number of original observations of the linkage matrix passed.
|
||||
|
||||
* Utility routines for plotting:
|
||||
|
||||
|
||||
functions | introduction
|
||||
----|----
|
||||
set_link_color_palette(palette) | Set list of matplotlib color codes for use by dendrogram.
|
||||
|
||||
## 原理
|
||||
|
||||
K均值聚类是一种在一组未标记数据中查找聚类和聚类中心的方法。 直觉上,我们可以将一个群集(簇聚)看作 - 包含一组数据点,其点间距离与群集外点的距离相比较小。 给定一个K中心的初始集合,K均值算法重复以下两个步骤 -
|
||||
|
||||
* 对于每个中心,比其他中心更接近它的训练点的子集(其聚类)被识别出来。
|
||||
* 计算每个聚类中数据点的每个要素的平均值,并且此平均向量将成为该聚类的新中心。
|
||||
|
||||
|
||||
重复这两个步骤,直到中心不再移动或分配不再改变。 然后,可以将新点x分配给最接近的原型的群集。 SciPy库通过集群包提供了K-Means算法的良好实现。 下面来了解如何使用它。
|
||||
|
||||
## 实现
|
||||
|
||||
* 导入K-Means
|
||||
```py
|
||||
from SciPy.cluster.vq import kmeans,vq,whiten
|
||||
Python
|
||||
```
|
||||
* 数据生成
|
||||
```py
|
||||
from numpy import vstack,array
|
||||
from numpy.random import rand
|
||||
|
||||
# data generation with three features
|
||||
data = vstack((rand(100,3) + array([.5,.5,.5]),rand(100,3)))
|
||||
```
|
||||
* 根据每个要素标准化一组观察值。 在运行K-Means之前,使用白化重新缩放观察集的每个特征维度是有好处的。 每个特征除以所有观测值的标准偏差以给出其单位差异。美化数据
|
||||
```py
|
||||
# whitening of data
|
||||
data = whiten(data)
|
||||
print (data)
|
||||
```
|
||||
* 用三个集群计算K均值现在使用以下代码计算三个群集的K均值。
|
||||
```py
|
||||
# computing K-Means with K = 3 (2 clusters)
|
||||
centroids,_ = kmeans(data,3)
|
||||
```
|
||||
* 上述代码对形成K个簇的一组观测向量执行K均值。 K-Means算法调整质心直到不能获得足够的进展,即失真的变化,因为最后一次迭代小于某个阈值。 在这里,可以通过使用下面给出的代码打印centroids变量来观察簇。
|
||||
```py
|
||||
print(centroids)
|
||||
```
|
||||
* 使用下面给出的代码将每个值分配给一个集群。
|
||||
```py
|
||||
# assign each sample to a cluster
|
||||
clx,_ = vq(data,centroids)
|
||||
```
|
||||
* vq函数将'M'中的每个观察向量与'N' obs数组与centroids进行比较,并将观察值分配给最近的聚类。 它返回每个观察和失真的聚类。 我们也可以检查失真。使用下面的代码检查每个观察的聚类。
|
||||
4
Python/scipy/18constant.md
Normal file
4
Python/scipy/18constant.md
Normal file
@@ -0,0 +1,4 @@
|
||||
## 数字常量
|
||||
|
||||
## 物理常量
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
## index trics
|
||||
|
||||
## shape manipulation
|
||||
|
||||
## polynomials
|
||||
|
||||
## vectorizing functions
|
||||
|
||||
## type handling
|
||||
|
||||
@@ -3,4 +3,32 @@
|
||||
## Bessel functions of real order
|
||||
> bassel函数
|
||||
|
||||
## Cython Bindings for Special Functions
|
||||
$$
|
||||
x^2\frac{d^2y}{dx^2}+x\frac{dy}{dx}+(x^2-\alpha^2)y=0
|
||||
$$
|
||||
```py
|
||||
from scipy import special
|
||||
def drumhead_height(n, k, distance, angle, t):
|
||||
kth_zero = special.jn_zeros(n, k)[-1]
|
||||
return np.cos(t) * np.cos(n*angle) * special.jn(n, distance*kth_zero)
|
||||
theta = np.r_[0:2*np.pi:50j]
|
||||
radius = np.r_[0:1:50j]
|
||||
x = np.array([r * np.cos(theta) for r in radius])
|
||||
y = np.array([r * np.sin(theta) for r in radius])
|
||||
z = np.array([drumhead_height(1, 1, r, theta, 0.5) for r in radius])
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
from matplotlib import cm
|
||||
fig = plt.figure()
|
||||
ax = Axes3D(fig)
|
||||
ax.plot_surface(x, y, z, rstride=1, cstride=1, cmap='RdBu_r', vmin=-0.5, vmax=0.5)
|
||||
ax.set_xlabel('X')
|
||||
ax.set_ylabel('Y')
|
||||
ax.set_zlabel('Z')
|
||||
plt.show()
|
||||
```
|
||||
|
||||
## Cython Bindings for Special Functions
|
||||
> scipy.special.cython_special
|
||||
|
||||
|
||||
@@ -1,13 +1,129 @@
|
||||
## 线性代数
|
||||
> 主要修改二维数组
|
||||
# 线性代数
|
||||
|
||||
## 简介
|
||||
SciPy是使用优化的ATLAS LAPACK和BLAS库构建的。 它具有非常快的线性代数能力。 所有这些线性代数例程都需要一个可以转换为二维数组的对象。 这些例程的输出也是一个二维数组。
|
||||
|
||||
### SciPy.linalg与NumPy.linalg
|
||||
|
||||
scipy.linalg包含numpy.linalg中的所有函数。 另外,scipy.linalg还有一些不在numpy.linalg中的高级函数。 在numpy.linalg上使用scipy.linalg的另一个优点是它总是用BLAS/LAPACK支持编译,而对于NumPy,这是可选的。 因此,根据NumPy的安装方式,SciPy版本可能会更快。
|
||||
|
||||
## 线性方程组
|
||||
|
||||
## 行列式
|
||||
### 数学实例
|
||||
scipy.linalg.solve特征为未知的x,y值求解线性方程a * x + b * y = Z。
|
||||
作为一个例子,假设需要解下面的联立方程。
|
||||
```
|
||||
x+3y+5z=10
|
||||
2x+5y+z=8
|
||||
2x+3y+8z=3
|
||||
```
|
||||
要求解x,y,z值的上述方程式,可以使用矩阵求逆来求解向量,如下所示。
|
||||
$$
|
||||
A[x,y,z]^T=[10,8,3]^T\\
|
||||
[x,y,z]^T=A^{-1}[10,8,3]^T
|
||||
$$
|
||||
|
||||
## 特征值特征向量
|
||||
### 编程实现
|
||||
但是,最好使用linalg.solve命令,该命令可以更快,更稳定。求解函数采用两个输入'a'和'b',其中'a'表示系数,'b'表示相应的右侧值并返回解矩阵。
|
||||
```py
|
||||
#importing the scipy and numpy packages
|
||||
from scipy import linalg
|
||||
import numpy as np
|
||||
|
||||
#Declaring the numpy arrays
|
||||
a = np.array([[3, 2, 0], [1, -1, 0], [0, 5, 1]])
|
||||
b = np.array([2, 4, -1])
|
||||
|
||||
#Passing the values to the solve function
|
||||
x = linalg.solve(a, b)
|
||||
|
||||
#printing the result array
|
||||
print (x)
|
||||
```
|
||||
执行上面示例代码,得到以下结果
|
||||
```py
|
||||
[ 2. -2. 9.]
|
||||
```
|
||||
|
||||
|
||||
## 行列式
|
||||
|
||||
|
||||
方阵A的行列式通常表示为| A |并且是线性代数中经常使用的量。 在SciPy中,这是使用det()函数计算的。 它将矩阵作为输入并返回一个标量值。
|
||||
```py
|
||||
#importing the scipy and numpy packages
|
||||
from scipy import linalg
|
||||
import numpy as np
|
||||
|
||||
#Declaring the numpy array
|
||||
A = np.array([[1,2],[3,4]])
|
||||
|
||||
#Passing the values to the det function
|
||||
x = linalg.det(A)
|
||||
|
||||
#printing the result
|
||||
print (x)
|
||||
|
||||
# 执行上面示例代码,得到以下结果 -
|
||||
-2.0
|
||||
```
|
||||
|
||||
## 特征值和特征向量特征值
|
||||
|
||||
特征向量问题是最常用的线性代数运算之一。 我们可以通过考虑以下关系式来找到方阵(A)的特征值(λ)和相应的特征向量(v)
|
||||
```
|
||||
Av = λv
|
||||
```
|
||||
scipy.linalg.eig从普通或广义特征值问题计算特征值。 该函数返回特征值和特征向量。
|
||||
```py
|
||||
#importing the scipy and numpy packages
|
||||
from scipy import linalg
|
||||
import numpy as np
|
||||
|
||||
#Declaring the numpy array
|
||||
A = np.array([[1,2],[3,4]])
|
||||
|
||||
#Passing the values to the eig function
|
||||
l, v = linalg.eig(A)
|
||||
|
||||
#printing the result for eigen values
|
||||
print (l)
|
||||
|
||||
#printing the result for eigen vectors
|
||||
print (v)
|
||||
```
|
||||
执行上面示例代码,得到以下结果 -
|
||||
```
|
||||
[-0.37228132+0.j 5.37228132+0.j]
|
||||
[[-0.82456484 -0.41597356]
|
||||
[ 0.56576746 -0.90937671]]
|
||||
```
|
||||
|
||||
## 奇异值分解奇异值分解(SVD)
|
||||
|
||||
可以被认为是特征值问题扩展到非矩阵的矩阵。
|
||||
scipy.linalg.svd将矩阵'a'分解为两个酉矩阵'U'和'Vh',以及一个奇异值(实数,非负)的一维数组's',使得a == U * S * Vh,其中'S'是具有主对角线's'的适当形状的零点矩阵。
|
||||
|
||||
```py
|
||||
#importing the scipy and numpy packages
|
||||
from scipy import linalg
|
||||
import numpy as np
|
||||
|
||||
#Declaring the numpy array
|
||||
a = np.random.randn(3, 2) + 1.j*np.random.randn(3, 2)
|
||||
|
||||
#Passing the values to the eig function
|
||||
U, s, Vh = linalg.svd(a)
|
||||
|
||||
# printing the result
|
||||
print (U, Vh, s)
|
||||
|
||||
# 执行上面示例代码,得到以下结果 -
|
||||
[[-0.60142679+0.28212127j 0.35719830-0.03260559j 0.61548126-0.22632383j]
|
||||
[-0.00477296+0.44250532j 0.64058557+0.15734719j -0.40414313+0.45357092j]
|
||||
[ 0.46360086+0.38462177j -0.18611686+0.6337182j 0.44311251+0.06747886j]] [[ 0.98724353+0.j -0.01113675+0.15882756j]
|
||||
[-0.15921753+0.j -0.06905445+0.9848255j ]] [ 2.04228408 1.33798044]
|
||||
|
||||
```
|
||||
|
||||
## 奇异值分解
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# 说明
|
||||
|
||||
别纠结了,这一部分,就直接参考官方的教程跟api文档就好了,不用学习。你需要学的是数学。然后每次遇到数学问题,查手册解决。
|
||||
别纠结了,这一部分,就直接参考官方的教程跟api文档就好了,不用学习。你需要学的是数学。然后每次遇到数学问题,查手册解决。
|
||||
|
||||
别写了,查看文档就好。浪费时间
|
||||
|
||||
21
Python/scipy/bessel_test.py
Normal file
21
Python/scipy/bessel_test.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from scipy import special
|
||||
import numpy as np
|
||||
def drumhead_height(n, k, distance, angle, t):
|
||||
kth_zero = special.jn_zeros(n, k)[-1]
|
||||
return np.cos(t) * np.cos(n*angle) * special.jn(n, distance*kth_zero)
|
||||
theta = np.r_[0:2*np.pi:50j]
|
||||
radius = np.r_[0:1:50j]
|
||||
x = np.array([r * np.cos(theta) for r in radius])
|
||||
y = np.array([r * np.sin(theta) for r in radius])
|
||||
z = np.array([drumhead_height(1, 1, r, theta, 0.5) for r in radius])
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
from matplotlib import cm
|
||||
fig = plt.figure()
|
||||
ax = Axes3D(fig)
|
||||
ax.plot_surface(x, y, z, rstride=1, cstride=1, cmap='RdBu_r', vmin=-0.5, vmax=0.5)
|
||||
ax.set_xlabel('X')
|
||||
ax.set_ylabel('Y')
|
||||
ax.set_zlabel('Z')
|
||||
plt.show()
|
||||
12
Python/scipy/cluster_test.py
Normal file
12
Python/scipy/cluster_test.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import numpy as np
|
||||
from scipy.cluster.vq import kmeans,vq,whiten
|
||||
|
||||
data = np.vstack((np.random.rand(100,3)+np.array([.5,.5,.5]),np.random.rand(100,3)))
|
||||
data = whiten(data)
|
||||
|
||||
cent,_ = kmeans(data,3)
|
||||
|
||||
print(cent)
|
||||
|
||||
# assign each sample to a cluster
|
||||
clx,_ = vq(data,centroids)
|
||||
37
Python/scipy/linalg_test.py
Normal file
37
Python/scipy/linalg_test.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from scipy import linalg
|
||||
import numpy as np
|
||||
|
||||
#Declaring the numpy arrays
|
||||
a = np.array([[3, 2, 0], [1, -1, 0], [0, 5, 1]])
|
||||
b = np.array([2, 4, -1])
|
||||
|
||||
# 求矩阵的行列式
|
||||
print(np.linalg.det(a))
|
||||
print(linalg.det(a))
|
||||
|
||||
# 求矩阵的特征值和特征向量
|
||||
|
||||
print('eig:')
|
||||
print(np.linalg.eig(a))
|
||||
print(linalg.eig(a))
|
||||
|
||||
# 奇异值分解svd
|
||||
print('svd:')
|
||||
m = np.array([[3,2,4],[1,3,2]])
|
||||
print(np.linalg.svd(a))
|
||||
print(linalg.svd(a))
|
||||
|
||||
# 利用矩阵的逆求解方程组
|
||||
a_ = np.linalg.inv(a)
|
||||
x = np.matmul(a_,b)
|
||||
print(x)
|
||||
|
||||
# 使用numpy的线性代数部分求解矩阵的逆
|
||||
x = np.linalg.solve(a,b)
|
||||
print(x)
|
||||
|
||||
#Passing the values to the solve function
|
||||
x = linalg.solve(a, b)
|
||||
|
||||
#printing the result array
|
||||
print(x)
|
||||
Reference in New Issue
Block a user