Files
notes_estom/Python/matplotlab/gallery/statistics/boxplot_demo.md
2020-09-26 22:03:11 +08:00

238 lines
7.7 KiB
Markdown
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 箱形图
用matplotlib可视化箱形图。
以下示例展示了如何使用Matplotlib可视化箱图。有许多选项可以控制它们的外观以及用于汇总数据的统计信息。
```python
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Polygon
# Fixing random state for reproducibility
np.random.seed(19680801)
# fake up some data
spread = np.random.rand(50) * 100
center = np.ones(25) * 50
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
data = np.concatenate((spread, center, flier_high, flier_low))
fig, axs = plt.subplots(2, 3)
# basic plot
axs[0, 0].boxplot(data)
axs[0, 0].set_title('basic plot')
# notched plot
axs[0, 1].boxplot(data, 1)
axs[0, 1].set_title('notched plot')
# change outlier point symbols
axs[0, 2].boxplot(data, 0, 'gD')
axs[0, 2].set_title('change outlier\npoint symbols')
# don't show outlier points
axs[1, 0].boxplot(data, 0, '')
axs[1, 0].set_title("don't show\noutlier points")
# horizontal boxes
axs[1, 1].boxplot(data, 0, 'rs', 0)
axs[1, 1].set_title('horizontal boxes')
# change whisker length
axs[1, 2].boxplot(data, 0, 'rs', 0, 0.75)
axs[1, 2].set_title('change whisker length')
fig.subplots_adjust(left=0.08, right=0.98, bottom=0.05, top=0.9,
hspace=0.4, wspace=0.3)
# fake up some more data
spread = np.random.rand(50) * 100
center = np.ones(25) * 40
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
d2 = np.concatenate((spread, center, flier_high, flier_low))
data.shape = (-1, 1)
d2.shape = (-1, 1)
# Making a 2-D array only works if all the columns are the
# same length. If they are not, then use a list instead.
# This is actually more efficient because boxplot converts
# a 2-D array into a list of vectors internally anyway.
data = [data, d2, d2[::2, 0]]
# Multiple box plots on one Axes
fig, ax = plt.subplots()
ax.boxplot(data)
plt.show()
```
下面我们将从五个不同的概率分布生成数据,每个概率分布具有不同的特征。 我们想要了解数据的IID引导程序重采样如何保留原始样本的分布属性并且箱形图是进行此评估的一种可视化工具。
```python
numDists = 5
randomDists = ['Normal(1,1)', ' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)',
'Triangular(2,9,11)']
N = 500
norm = np.random.normal(1, 1, N)
logn = np.random.lognormal(1, 1, N)
expo = np.random.exponential(1, N)
gumb = np.random.gumbel(6, 4, N)
tria = np.random.triangular(2, 9, 11, N)
# Generate some random indices that we'll use to resample the original data
# arrays. For code brevity, just use the same random indices for each array
bootstrapIndices = np.random.random_integers(0, N - 1, N)
normBoot = norm[bootstrapIndices]
expoBoot = expo[bootstrapIndices]
gumbBoot = gumb[bootstrapIndices]
lognBoot = logn[bootstrapIndices]
triaBoot = tria[bootstrapIndices]
data = [norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot,
tria, triaBoot]
fig, ax1 = plt.subplots(figsize=(10, 6))
fig.canvas.set_window_title('A Boxplot Example')
fig.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)
bp = ax1.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')
# Add a horizontal grid to the plot, but make it very light in color
# so we can use it for reading data values but not be distracting
ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
alpha=0.5)
# Hide these grid behind plot objects
ax1.set_axisbelow(True)
ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions')
ax1.set_xlabel('Distribution')
ax1.set_ylabel('Value')
# Now fill the boxes with desired colors
boxColors = ['darkkhaki', 'royalblue']
numBoxes = numDists*2
medians = list(range(numBoxes))
for i in range(numBoxes):
box = bp['boxes'][i]
boxX = []
boxY = []
for j in range(5):
boxX.append(box.get_xdata()[j])
boxY.append(box.get_ydata()[j])
boxCoords = np.column_stack([boxX, boxY])
# Alternate between Dark Khaki and Royal Blue
k = i % 2
boxPolygon = Polygon(boxCoords, facecolor=boxColors[k])
ax1.add_patch(boxPolygon)
# Now draw the median lines back over what we just filled in
med = bp['medians'][i]
medianX = []
medianY = []
for j in range(2):
medianX.append(med.get_xdata()[j])
medianY.append(med.get_ydata()[j])
ax1.plot(medianX, medianY, 'k')
medians[i] = medianY[0]
# Finally, overplot the sample averages, with horizontal alignment
# in the center of each box
ax1.plot([np.average(med.get_xdata())], [np.average(data[i])],
color='w', marker='*', markeredgecolor='k')
# Set the axes ranges and axes labels
ax1.set_xlim(0.5, numBoxes + 0.5)
top = 40
bottom = -5
ax1.set_ylim(bottom, top)
ax1.set_xticklabels(np.repeat(randomDists, 2),
rotation=45, fontsize=8)
# Due to the Y-axis scale being different across samples, it can be
# hard to compare differences in medians across the samples. Add upper
# X-axis tick labels with the sample medians to aid in comparison
# (just use two decimal places of precision)
pos = np.arange(numBoxes) + 1
upperLabels = [str(np.round(s, 2)) for s in medians]
weights = ['bold', 'semibold']
for tick, label in zip(range(numBoxes), ax1.get_xticklabels()):
k = tick % 2
ax1.text(pos[tick], top - (top*0.05), upperLabels[tick],
horizontalalignment='center', size='x-small', weight=weights[k],
color=boxColors[k])
# Finally, add a basic legend
fig.text(0.80, 0.08, str(N) + ' Random Numbers',
backgroundcolor=boxColors[0], color='black', weight='roman',
size='x-small')
fig.text(0.80, 0.045, 'IID Bootstrap Resample',
backgroundcolor=boxColors[1],
color='white', weight='roman', size='x-small')
fig.text(0.80, 0.015, '*', color='white', backgroundcolor='silver',
weight='roman', size='medium')
fig.text(0.815, 0.013, ' Average Value', color='black', weight='roman',
size='x-small')
plt.show()
```
![箱形图](https://matplotlib.org/_images/sphx_glr_boxplot_demo_003.png)
在这里我们编写一个自定义函数来引导置信区间。然后我们可以使用boxplot和此函数来显示这些间隔。
```python
def fakeBootStrapper(n):
'''
This is just a placeholder for the user's method of
bootstrapping the median and its confidence intervals.
Returns an arbitrary median and confidence intervals
packed into a tuple
'''
if n == 1:
med = 0.1
CI = (-0.25, 0.25)
else:
med = 0.2
CI = (-0.35, 0.50)
return med, CI
inc = 0.1
e1 = np.random.normal(0, 1, size=(500,))
e2 = np.random.normal(0, 1, size=(500,))
e3 = np.random.normal(0, 1 + inc, size=(500,))
e4 = np.random.normal(0, 1 + 2*inc, size=(500,))
treatments = [e1, e2, e3, e4]
med1, CI1 = fakeBootStrapper(1)
med2, CI2 = fakeBootStrapper(2)
medians = [None, None, med1, med2]
conf_intervals = [None, None, CI1, CI2]
fig, ax = plt.subplots()
pos = np.array(range(len(treatments))) + 1
bp = ax.boxplot(treatments, sym='k+', positions=pos,
notch=1, bootstrap=5000,
usermedians=medians,
conf_intervals=conf_intervals)
ax.set_xlabel('treatment')
ax.set_ylabel('response')
plt.setp(bp['whiskers'], color='k', linestyle='-')
plt.setp(bp['fliers'], markersize=3.0)
plt.show()
```
![箱形图2](https://matplotlib.org/_images/sphx_glr_boxplot_demo_004.png)
## 下载这个示例
- [下载python源码: boxplot_demo.py](https://matplotlib.org/_downloads/boxplot_demo.py)
- [下载Jupyter notebook: boxplot_demo.ipynb](https://matplotlib.org/_downloads/boxplot_demo.ipynb)