# 箱形图 用matplotlib可视化箱形图。 以下示例展示了如何使用Matplotlib可视化箱图。有许多选项可以控制它们的外观以及用于汇总数据的统计信息。 ```python import matplotlib.pyplot as plt import numpy as np from matplotlib.patches import Polygon # Fixing random state for reproducibility np.random.seed(19680801) # fake up some data spread = np.random.rand(50) * 100 center = np.ones(25) * 50 flier_high = np.random.rand(10) * 100 + 100 flier_low = np.random.rand(10) * -100 data = np.concatenate((spread, center, flier_high, flier_low)) fig, axs = plt.subplots(2, 3) # basic plot axs[0, 0].boxplot(data) axs[0, 0].set_title('basic plot') # notched plot axs[0, 1].boxplot(data, 1) axs[0, 1].set_title('notched plot') # change outlier point symbols axs[0, 2].boxplot(data, 0, 'gD') axs[0, 2].set_title('change outlier\npoint symbols') # don't show outlier points axs[1, 0].boxplot(data, 0, '') axs[1, 0].set_title("don't show\noutlier points") # horizontal boxes axs[1, 1].boxplot(data, 0, 'rs', 0) axs[1, 1].set_title('horizontal boxes') # change whisker length axs[1, 2].boxplot(data, 0, 'rs', 0, 0.75) axs[1, 2].set_title('change whisker length') fig.subplots_adjust(left=0.08, right=0.98, bottom=0.05, top=0.9, hspace=0.4, wspace=0.3) # fake up some more data spread = np.random.rand(50) * 100 center = np.ones(25) * 40 flier_high = np.random.rand(10) * 100 + 100 flier_low = np.random.rand(10) * -100 d2 = np.concatenate((spread, center, flier_high, flier_low)) data.shape = (-1, 1) d2.shape = (-1, 1) # Making a 2-D array only works if all the columns are the # same length. If they are not, then use a list instead. # This is actually more efficient because boxplot converts # a 2-D array into a list of vectors internally anyway. data = [data, d2, d2[::2, 0]] # Multiple box plots on one Axes fig, ax = plt.subplots() ax.boxplot(data) plt.show() ``` 下面我们将从五个不同的概率分布生成数据,每个概率分布具有不同的特征。 我们想要了解数据的IID引导程序重采样如何保留原始样本的分布属性,并且箱形图是进行此评估的一种可视化工具。 ```python numDists = 5 randomDists = ['Normal(1,1)', ' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)', 'Triangular(2,9,11)'] N = 500 norm = np.random.normal(1, 1, N) logn = np.random.lognormal(1, 1, N) expo = np.random.exponential(1, N) gumb = np.random.gumbel(6, 4, N) tria = np.random.triangular(2, 9, 11, N) # Generate some random indices that we'll use to resample the original data # arrays. For code brevity, just use the same random indices for each array bootstrapIndices = np.random.random_integers(0, N - 1, N) normBoot = norm[bootstrapIndices] expoBoot = expo[bootstrapIndices] gumbBoot = gumb[bootstrapIndices] lognBoot = logn[bootstrapIndices] triaBoot = tria[bootstrapIndices] data = [norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot, tria, triaBoot] fig, ax1 = plt.subplots(figsize=(10, 6)) fig.canvas.set_window_title('A Boxplot Example') fig.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25) bp = ax1.boxplot(data, notch=0, sym='+', vert=1, whis=1.5) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='red', marker='+') # Add a horizontal grid to the plot, but make it very light in color # so we can use it for reading data values but not be distracting ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) # Hide these grid behind plot objects ax1.set_axisbelow(True) ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions') ax1.set_xlabel('Distribution') ax1.set_ylabel('Value') # Now fill the boxes with desired colors boxColors = ['darkkhaki', 'royalblue'] numBoxes = numDists*2 medians = list(range(numBoxes)) for i in range(numBoxes): box = bp['boxes'][i] boxX = [] boxY = [] for j in range(5): boxX.append(box.get_xdata()[j]) boxY.append(box.get_ydata()[j]) boxCoords = np.column_stack([boxX, boxY]) # Alternate between Dark Khaki and Royal Blue k = i % 2 boxPolygon = Polygon(boxCoords, facecolor=boxColors[k]) ax1.add_patch(boxPolygon) # Now draw the median lines back over what we just filled in med = bp['medians'][i] medianX = [] medianY = [] for j in range(2): medianX.append(med.get_xdata()[j]) medianY.append(med.get_ydata()[j]) ax1.plot(medianX, medianY, 'k') medians[i] = medianY[0] # Finally, overplot the sample averages, with horizontal alignment # in the center of each box ax1.plot([np.average(med.get_xdata())], [np.average(data[i])], color='w', marker='*', markeredgecolor='k') # Set the axes ranges and axes labels ax1.set_xlim(0.5, numBoxes + 0.5) top = 40 bottom = -5 ax1.set_ylim(bottom, top) ax1.set_xticklabels(np.repeat(randomDists, 2), rotation=45, fontsize=8) # Due to the Y-axis scale being different across samples, it can be # hard to compare differences in medians across the samples. Add upper # X-axis tick labels with the sample medians to aid in comparison # (just use two decimal places of precision) pos = np.arange(numBoxes) + 1 upperLabels = [str(np.round(s, 2)) for s in medians] weights = ['bold', 'semibold'] for tick, label in zip(range(numBoxes), ax1.get_xticklabels()): k = tick % 2 ax1.text(pos[tick], top - (top*0.05), upperLabels[tick], horizontalalignment='center', size='x-small', weight=weights[k], color=boxColors[k]) # Finally, add a basic legend fig.text(0.80, 0.08, str(N) + ' Random Numbers', backgroundcolor=boxColors[0], color='black', weight='roman', size='x-small') fig.text(0.80, 0.045, 'IID Bootstrap Resample', backgroundcolor=boxColors[1], color='white', weight='roman', size='x-small') fig.text(0.80, 0.015, '*', color='white', backgroundcolor='silver', weight='roman', size='medium') fig.text(0.815, 0.013, ' Average Value', color='black', weight='roman', size='x-small') plt.show() ``` ![箱形图](https://matplotlib.org/_images/sphx_glr_boxplot_demo_003.png) 在这里,我们编写一个自定义函数来引导置信区间。然后我们可以使用boxplot和此函数来显示这些间隔。 ```python def fakeBootStrapper(n): ''' This is just a placeholder for the user's method of bootstrapping the median and its confidence intervals. Returns an arbitrary median and confidence intervals packed into a tuple ''' if n == 1: med = 0.1 CI = (-0.25, 0.25) else: med = 0.2 CI = (-0.35, 0.50) return med, CI inc = 0.1 e1 = np.random.normal(0, 1, size=(500,)) e2 = np.random.normal(0, 1, size=(500,)) e3 = np.random.normal(0, 1 + inc, size=(500,)) e4 = np.random.normal(0, 1 + 2*inc, size=(500,)) treatments = [e1, e2, e3, e4] med1, CI1 = fakeBootStrapper(1) med2, CI2 = fakeBootStrapper(2) medians = [None, None, med1, med2] conf_intervals = [None, None, CI1, CI2] fig, ax = plt.subplots() pos = np.array(range(len(treatments))) + 1 bp = ax.boxplot(treatments, sym='k+', positions=pos, notch=1, bootstrap=5000, usermedians=medians, conf_intervals=conf_intervals) ax.set_xlabel('treatment') ax.set_ylabel('response') plt.setp(bp['whiskers'], color='k', linestyle='-') plt.setp(bp['fliers'], markersize=3.0) plt.show() ``` ![箱形图2](https://matplotlib.org/_images/sphx_glr_boxplot_demo_004.png) ## 下载这个示例 - [下载python源码: boxplot_demo.py](https://matplotlib.org/_downloads/boxplot_demo.py) - [下载Jupyter notebook: boxplot_demo.ipynb](https://matplotlib.org/_downloads/boxplot_demo.ipynb)