build

2026-02-10 06:05:35 +08:00 · 2023-03-23 18:56:56 +08:00
parent 2715ce703a
commit 0dfdcf0bab
25 changed files with 118 additions and 115 deletions
--- a/chapter_sorting/bubble_sort.md
+++ b/chapter_sorting/bubble_sort.md
@@ -87,7 +87,7 @@ comments: true
 === "Python"

    ```python title="bubble_sort.py"
-    def bubble_sort(nums: List[int]) -> None:
+    def bubble_sort(nums: list[int]) -> None:
        """ 冒泡排序 """
        n: int = len(nums)
        # 外循环：待排序元素数量为 n-1, n-2, ..., 1
@@ -295,7 +295,7 @@ comments: true
 === "Python"

    ```python title="bubble_sort.py"
-    def bubble_sort_with_flag(nums: List[int]) -> None:
+    def bubble_sort_with_flag(nums: list[int]) -> None:
        """ 冒泡排序（标志优化） """
        n: int = len(nums)
        # 外循环：待排序元素数量为 n-1, n-2, ..., 1
--- a/chapter_sorting/bucket_sort.md
+++ b/chapter_sorting/bucket_sort.md
@@ -1,16 +1,20 @@
-## 拓展到桶排序
+# 桶排序

-如果我们把上述 `bucket` 中的每个索引想象成一个桶，那么可以将计数排序理解为把 $n$ 个元素分配到对应的桶中，再根据桶与桶之间天然的有序性来实现排序。
+「桶排序 Bucket Sort」考虑设置 $k$ 个桶，并将 $n$ 个元素根据大小分配到 $k$ 个桶中，**并在每个桶内部分别执行排序**，由于桶之间的大小关系的确定的，因此最后按照桶之间的顺序将元素依次展开即可。

-以上解读便是「桶排序 Bucket Sort」的核心思想。具体地，桶排序考虑将 $n$ 个元素根据大小范围均匀地分配到 $k$ 个桶中，由于桶之间是有序的，**因此仅需在每个桶内部执行排序**，最终按照桶之间的大小关系将元素依次排列，即可得到排序结果。
+假设元素平均分布在各个桶内，则每个桶内元素数量为 $\frac{n}{k}$ ；如果使用「快速排序」来实现桶内排序，则排序单个桶使用 $O(\frac{n}{k} \log\frac{n}{k})$ 时间，排序所有桶使用 $O(n \log\frac{n}{k})$ 时间。**当桶数量 $k$ 比较大时，时间复杂度则趋向于 $O(n)$** 。

-假设使用「快速排序」来排序各个桶内的元素，每个桶内元素数量为 $\frac{n}{k}$ ，则排序单个桶使用 $O(\frac{n}{k} \log\frac{n}{k})$ 时间，排序所有桶使用 $O(n \log\frac{n}{k})$ 时间。**当桶数量 $k$ 接近 $n$ 时，时间复杂度则趋向于 $O(n)$** 。
+!!! note 计数排序与桶排序的关系
+
+    **计数排序可以看作是桶排序的一种特例**。我们可以把计数排序中 `counter` 的每个索引想象成一个桶，将统计数量的过程想象成把 $n$ 个元素分配到对应的桶中，再根据桶之间的有序性输出结果，从而实现排序。

 （图）

-理论上桶排序的时间复杂度是 $O(n)$ ，**但前提是需要将元素均匀分配到各个桶中**，而这是不太容易做到的。假设我们要把淘宝中的 $100$ 万件商品根据价格范围平均分配到 $100$ 个桶中，由于商品价格不是均匀分布的，比如 $1$ ~ $100$ 元的商品非常多、$1$ 万元以上的商品非常少等，因此难以简单地设定各个桶的价格分界线。解决方案有：
+理论上桶排序的时间复杂度是 $O(n)$ ，**但前提是需要将元素均匀分配到各个桶中**，而这并不容易做到。假设想要把淘宝的 $100$ 万件商品根据价格范围平均分配到 $100$ 个桶中，而商品价格不是均匀分布的，例如 $100$ 元以下的商品非常多、$1000$ 元以上的商品非常少等。如果我们将价格区间平均划分为 $100$ 份，那么各个桶内的商品数量差距会非常大。为了实现平均分配，我们一般这样做：

- 先初步设置一个分界线，将元素分配完后，**把元素较多的桶继续划分为多个桶**，直至每个桶内元素数量合理为止；该做法一般使用递归实现；
- 如果我们提前知道商品价格的概率分布，**则可以根据已知分布来设置每个桶的价格分界线**；值得说明的是，数据分布不一定需要 case-by-case 地统计，有时可以采用一些常见分布来近似，例如自然界的正态分布；
+- 先粗略设置分界线，将元素分配完后，**把元素较多的桶继续划分为多个桶**，直至所有桶内元素数量合理为止；该做法本质上是一个递归树；
+- 如果我们提前知道商品价格的概率分布，**则可以根据已知分布来设置每个桶的价格分界线**；值得说明的是，数据分布不一定需要特意统计，也可以根据数据特点采用某种常见概率模型来近似，例如自然界的正态分布等；

-（图）
+（图）
+
+另外，排序桶内元素需要选择一种合适的排序算法，比如快速排序。
--- a/chapter_sorting/counting_sort.md
+++ b/chapter_sorting/counting_sort.md
@@ -77,7 +77,7 @@ comments: true
 === "Python"

    ```python title="counting_sort.py"
-    def counting_sort_naive(nums: List[int]) -> None:
+    def counting_sort_naive(nums: list[int]) -> None:
        """ 计数排序 """
        # 简单实现，无法用于排序对象
        # 1. 统计数组最大元素 m
@@ -301,7 +301,7 @@ $$
 === "Python"

    ```python title="counting_sort.py"
-    def counting_sort(nums: List[int]) -> None:
+    def counting_sort(nums: list[int]) -> None:
        """ 计数排序 """
        # 完整实现，可排序对象，并且是稳定排序
        # 1. 统计数组最大元素 m
--- a/chapter_sorting/insertion_sort.md
+++ b/chapter_sorting/insertion_sort.md
@@ -63,7 +63,7 @@ comments: true
 === "Python"

    ```python title="insertion_sort.py"
-    def insertion_sort(nums: List[int]) -> None:
+    def insertion_sort(nums: list[int]) -> None:
        """ 插入排序 """
        # 外循环：base = nums[1], nums[2], ..., nums[n-1]   
        for i in range(1, len(nums)):
--- a/chapter_sorting/merge_sort.md
+++ b/chapter_sorting/merge_sort.md
@@ -146,12 +146,12 @@ comments: true
 === "Python"

    ```python title="merge_sort.py"
-    def merge(nums: List[int], left: int, mid: int, right: int) -> None:
+    def merge(nums: list[int], left: int, mid: int, right: int) -> None:
        """ 合并左子数组和右子数组 """
        # 左子数组区间 [left, mid]
        # 右子数组区间 [mid + 1, right]
-        # 初始化辅助数组 借助 copy模块
-        tmp: List[int] = nums[left:right + 1].copy()
+        # 初始化辅助数组
+        tmp: list[int] = list(nums[left:right + 1])
        # 左子数组的起始索引和结束索引
        left_start: int = 0
        left_end: int = mid - left
@@ -176,7 +176,7 @@ comments: true
                nums[k] = tmp[j]
                j += 1

-    def merge_sort(nums: List[int], left: int, right: int) -> None:
+    def merge_sort(nums: list[int], left: int, right: int) -> None:
        """ 归并排序 """
        # 终止条件
        if left >= right:
--- a/chapter_sorting/quick_sort.md
+++ b/chapter_sorting/quick_sort.md
@@ -100,7 +100,7 @@ comments: true
 === "Python"

    ```python title="quick_sort.py"
-    def partition(self, nums: List[int], left: int, right: int) -> int:
+    def partition(self, nums: list[int], left: int, right: int) -> int:
        """ 哨兵划分 """
        # 以 nums[left] 作为基准数
        i, j = left, right
@@ -343,7 +343,7 @@ comments: true
 === "Python"

    ```python title="quick_sort.py"
-    def quick_sort(self, nums: List[int], left: int, right: int) -> None:
+    def quick_sort(self, nums: list[int], left: int, right: int) -> None:
        """ 快速排序 """
        # 子数组长度为 1 时终止递归
        if left >= right:
@@ -562,7 +562,7 @@ comments: true
 === "Python"

    ```python title="quick_sort.py"
-    def median_three(self, nums: List[int], left: int, mid: int, right: int) -> int:
+    def median_three(self, nums: list[int], left: int, mid: int, right: int) -> int:
        """ 选取三个元素的中位数 """
        # 此处使用异或运算来简化代码
        # 异或规则为 0 ^ 0 = 1 ^ 1 = 0, 0 ^ 1 = 1 ^ 0 = 1
@@ -572,7 +572,7 @@ comments: true
            return mid
        return right

-    def partition(self, nums: List[int], left: int, right: int) -> int:
+    def partition(self, nums: list[int], left: int, right: int) -> int:
        """ 哨兵划分（三数取中值） """
        # 以 nums[left] 作为基准数
        med: int = self.median_three(nums, left, (left + right) // 2, right)
@@ -855,7 +855,7 @@ comments: true
 === "Python"

    ```python title="quick_sort.py"
-    def quick_sort(self, nums: List[int], left: int, right: int) -> None:
+    def quick_sort(self, nums: list[int], left: int, right: int) -> None:
        """ 快速排序（尾递归优化） """
        # 子数组长度为 1 时终止
        while left < right: