mirror of
https://github.com/Estom/notes.git
synced 2026-04-04 19:38:33 +08:00
matplotlib & pandas
This commit is contained in:
238
Python/matplotlab/gallery/statistics/boxplot_demo.md
Normal file
238
Python/matplotlab/gallery/statistics/boxplot_demo.md
Normal file
@@ -0,0 +1,238 @@
|
||||
# 箱形图
|
||||
|
||||
用matplotlib可视化箱形图。
|
||||
|
||||
以下示例展示了如何使用Matplotlib可视化箱图。有许多选项可以控制它们的外观以及用于汇总数据的统计信息。
|
||||
|
||||
```python
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib.patches import Polygon
|
||||
|
||||
|
||||
# Fixing random state for reproducibility
|
||||
np.random.seed(19680801)
|
||||
|
||||
# fake up some data
|
||||
spread = np.random.rand(50) * 100
|
||||
center = np.ones(25) * 50
|
||||
flier_high = np.random.rand(10) * 100 + 100
|
||||
flier_low = np.random.rand(10) * -100
|
||||
data = np.concatenate((spread, center, flier_high, flier_low))
|
||||
|
||||
fig, axs = plt.subplots(2, 3)
|
||||
|
||||
# basic plot
|
||||
axs[0, 0].boxplot(data)
|
||||
axs[0, 0].set_title('basic plot')
|
||||
|
||||
# notched plot
|
||||
axs[0, 1].boxplot(data, 1)
|
||||
axs[0, 1].set_title('notched plot')
|
||||
|
||||
# change outlier point symbols
|
||||
axs[0, 2].boxplot(data, 0, 'gD')
|
||||
axs[0, 2].set_title('change outlier\npoint symbols')
|
||||
|
||||
# don't show outlier points
|
||||
axs[1, 0].boxplot(data, 0, '')
|
||||
axs[1, 0].set_title("don't show\noutlier points")
|
||||
|
||||
# horizontal boxes
|
||||
axs[1, 1].boxplot(data, 0, 'rs', 0)
|
||||
axs[1, 1].set_title('horizontal boxes')
|
||||
|
||||
# change whisker length
|
||||
axs[1, 2].boxplot(data, 0, 'rs', 0, 0.75)
|
||||
axs[1, 2].set_title('change whisker length')
|
||||
|
||||
fig.subplots_adjust(left=0.08, right=0.98, bottom=0.05, top=0.9,
|
||||
hspace=0.4, wspace=0.3)
|
||||
|
||||
# fake up some more data
|
||||
spread = np.random.rand(50) * 100
|
||||
center = np.ones(25) * 40
|
||||
flier_high = np.random.rand(10) * 100 + 100
|
||||
flier_low = np.random.rand(10) * -100
|
||||
d2 = np.concatenate((spread, center, flier_high, flier_low))
|
||||
data.shape = (-1, 1)
|
||||
d2.shape = (-1, 1)
|
||||
# Making a 2-D array only works if all the columns are the
|
||||
# same length. If they are not, then use a list instead.
|
||||
# This is actually more efficient because boxplot converts
|
||||
# a 2-D array into a list of vectors internally anyway.
|
||||
data = [data, d2, d2[::2, 0]]
|
||||
|
||||
# Multiple box plots on one Axes
|
||||
fig, ax = plt.subplots()
|
||||
ax.boxplot(data)
|
||||
|
||||
plt.show()
|
||||
```
|
||||
|
||||
下面我们将从五个不同的概率分布生成数据,每个概率分布具有不同的特征。 我们想要了解数据的IID引导程序重采样如何保留原始样本的分布属性,并且箱形图是进行此评估的一种可视化工具。
|
||||
|
||||
```python
|
||||
numDists = 5
|
||||
randomDists = ['Normal(1,1)', ' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)',
|
||||
'Triangular(2,9,11)']
|
||||
N = 500
|
||||
|
||||
norm = np.random.normal(1, 1, N)
|
||||
logn = np.random.lognormal(1, 1, N)
|
||||
expo = np.random.exponential(1, N)
|
||||
gumb = np.random.gumbel(6, 4, N)
|
||||
tria = np.random.triangular(2, 9, 11, N)
|
||||
|
||||
# Generate some random indices that we'll use to resample the original data
|
||||
# arrays. For code brevity, just use the same random indices for each array
|
||||
bootstrapIndices = np.random.random_integers(0, N - 1, N)
|
||||
normBoot = norm[bootstrapIndices]
|
||||
expoBoot = expo[bootstrapIndices]
|
||||
gumbBoot = gumb[bootstrapIndices]
|
||||
lognBoot = logn[bootstrapIndices]
|
||||
triaBoot = tria[bootstrapIndices]
|
||||
|
||||
data = [norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot,
|
||||
tria, triaBoot]
|
||||
|
||||
fig, ax1 = plt.subplots(figsize=(10, 6))
|
||||
fig.canvas.set_window_title('A Boxplot Example')
|
||||
fig.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)
|
||||
|
||||
bp = ax1.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
|
||||
plt.setp(bp['boxes'], color='black')
|
||||
plt.setp(bp['whiskers'], color='black')
|
||||
plt.setp(bp['fliers'], color='red', marker='+')
|
||||
|
||||
# Add a horizontal grid to the plot, but make it very light in color
|
||||
# so we can use it for reading data values but not be distracting
|
||||
ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||
alpha=0.5)
|
||||
|
||||
# Hide these grid behind plot objects
|
||||
ax1.set_axisbelow(True)
|
||||
ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions')
|
||||
ax1.set_xlabel('Distribution')
|
||||
ax1.set_ylabel('Value')
|
||||
|
||||
# Now fill the boxes with desired colors
|
||||
boxColors = ['darkkhaki', 'royalblue']
|
||||
numBoxes = numDists*2
|
||||
medians = list(range(numBoxes))
|
||||
for i in range(numBoxes):
|
||||
box = bp['boxes'][i]
|
||||
boxX = []
|
||||
boxY = []
|
||||
for j in range(5):
|
||||
boxX.append(box.get_xdata()[j])
|
||||
boxY.append(box.get_ydata()[j])
|
||||
boxCoords = np.column_stack([boxX, boxY])
|
||||
# Alternate between Dark Khaki and Royal Blue
|
||||
k = i % 2
|
||||
boxPolygon = Polygon(boxCoords, facecolor=boxColors[k])
|
||||
ax1.add_patch(boxPolygon)
|
||||
# Now draw the median lines back over what we just filled in
|
||||
med = bp['medians'][i]
|
||||
medianX = []
|
||||
medianY = []
|
||||
for j in range(2):
|
||||
medianX.append(med.get_xdata()[j])
|
||||
medianY.append(med.get_ydata()[j])
|
||||
ax1.plot(medianX, medianY, 'k')
|
||||
medians[i] = medianY[0]
|
||||
# Finally, overplot the sample averages, with horizontal alignment
|
||||
# in the center of each box
|
||||
ax1.plot([np.average(med.get_xdata())], [np.average(data[i])],
|
||||
color='w', marker='*', markeredgecolor='k')
|
||||
|
||||
# Set the axes ranges and axes labels
|
||||
ax1.set_xlim(0.5, numBoxes + 0.5)
|
||||
top = 40
|
||||
bottom = -5
|
||||
ax1.set_ylim(bottom, top)
|
||||
ax1.set_xticklabels(np.repeat(randomDists, 2),
|
||||
rotation=45, fontsize=8)
|
||||
|
||||
# Due to the Y-axis scale being different across samples, it can be
|
||||
# hard to compare differences in medians across the samples. Add upper
|
||||
# X-axis tick labels with the sample medians to aid in comparison
|
||||
# (just use two decimal places of precision)
|
||||
pos = np.arange(numBoxes) + 1
|
||||
upperLabels = [str(np.round(s, 2)) for s in medians]
|
||||
weights = ['bold', 'semibold']
|
||||
for tick, label in zip(range(numBoxes), ax1.get_xticklabels()):
|
||||
k = tick % 2
|
||||
ax1.text(pos[tick], top - (top*0.05), upperLabels[tick],
|
||||
horizontalalignment='center', size='x-small', weight=weights[k],
|
||||
color=boxColors[k])
|
||||
|
||||
# Finally, add a basic legend
|
||||
fig.text(0.80, 0.08, str(N) + ' Random Numbers',
|
||||
backgroundcolor=boxColors[0], color='black', weight='roman',
|
||||
size='x-small')
|
||||
fig.text(0.80, 0.045, 'IID Bootstrap Resample',
|
||||
backgroundcolor=boxColors[1],
|
||||
color='white', weight='roman', size='x-small')
|
||||
fig.text(0.80, 0.015, '*', color='white', backgroundcolor='silver',
|
||||
weight='roman', size='medium')
|
||||
fig.text(0.815, 0.013, ' Average Value', color='black', weight='roman',
|
||||
size='x-small')
|
||||
|
||||
plt.show()
|
||||
```
|
||||
|
||||

|
||||
|
||||
在这里,我们编写一个自定义函数来引导置信区间。然后我们可以使用boxplot和此函数来显示这些间隔。
|
||||
|
||||
```python
|
||||
def fakeBootStrapper(n):
|
||||
'''
|
||||
This is just a placeholder for the user's method of
|
||||
bootstrapping the median and its confidence intervals.
|
||||
|
||||
Returns an arbitrary median and confidence intervals
|
||||
packed into a tuple
|
||||
'''
|
||||
if n == 1:
|
||||
med = 0.1
|
||||
CI = (-0.25, 0.25)
|
||||
else:
|
||||
med = 0.2
|
||||
CI = (-0.35, 0.50)
|
||||
|
||||
return med, CI
|
||||
|
||||
inc = 0.1
|
||||
e1 = np.random.normal(0, 1, size=(500,))
|
||||
e2 = np.random.normal(0, 1, size=(500,))
|
||||
e3 = np.random.normal(0, 1 + inc, size=(500,))
|
||||
e4 = np.random.normal(0, 1 + 2*inc, size=(500,))
|
||||
|
||||
treatments = [e1, e2, e3, e4]
|
||||
med1, CI1 = fakeBootStrapper(1)
|
||||
med2, CI2 = fakeBootStrapper(2)
|
||||
medians = [None, None, med1, med2]
|
||||
conf_intervals = [None, None, CI1, CI2]
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
pos = np.array(range(len(treatments))) + 1
|
||||
bp = ax.boxplot(treatments, sym='k+', positions=pos,
|
||||
notch=1, bootstrap=5000,
|
||||
usermedians=medians,
|
||||
conf_intervals=conf_intervals)
|
||||
|
||||
ax.set_xlabel('treatment')
|
||||
ax.set_ylabel('response')
|
||||
plt.setp(bp['whiskers'], color='k', linestyle='-')
|
||||
plt.setp(bp['fliers'], markersize=3.0)
|
||||
plt.show()
|
||||
```
|
||||
|
||||

|
||||
|
||||
## 下载这个示例
|
||||
|
||||
- [下载python源码: boxplot_demo.py](https://matplotlib.org/_downloads/boxplot_demo.py)
|
||||
- [下载Jupyter notebook: boxplot_demo.ipynb](https://matplotlib.org/_downloads/boxplot_demo.ipynb)
|
||||
Reference in New Issue
Block a user