From 0ea635e8f5c6c983034ac1e73c010001c1bdf657 Mon Sep 17 00:00:00 2001 From: Shine wOng <1551885@tongji.edu.cn> Date: Tue, 15 Oct 2019 16:27:31 +0800 Subject: [PATCH] some modifications in thu_dsa/chp1, adding two functions in lcs/, updating exercises.md. --- thu_dsa/chp1/exercises.md | 136 ++++++++++++++++++ thu_dsa/chp1/lcs/lcs.cpp | 103 +++++++++++-- thu_dsa/chp1/lcs/lcs.h | 7 +- thu_dsa/chp1/lcs/test_lcs.cpp | 77 +++++++--- .../{master theorem.md => master_theorem.md} | 0 words.md | 2 +- 6 files changed, 293 insertions(+), 32 deletions(-) rename thu_dsa/chp1/{master theorem.md => master_theorem.md} (100%) diff --git a/thu_dsa/chp1/exercises.md b/thu_dsa/chp1/exercises.md index 5fb9e9e..162d095 100644 --- a/thu_dsa/chp1/exercises.md +++ b/thu_dsa/chp1/exercises.md @@ -35,3 +35,139 @@ (<, 1; 0, R, >); (>, #; #, R, h);//halt ``` + +## Introduction.E + +> 做递归跟踪分析时,为什么递归调用语句本身可不统计? + +因为递归语句本身的执行时间,是计入了对应的子实例当中,对于当前实例而言,只需要考虑函数调用的跳转执行,该执行的执行时间是`O(1)`。 + +> 试用递归跟踪法,分析`fib()`二分递归版的复杂度。通过递归跟踪,解释该版本复杂度过高的原因 + +可以针对`fib(n)`画出递归跟踪图,如下: + +``` + fib(n) + / \ + n-1 n-2 + / \ / \ + n-2 n-3 n-3 n-4 + / + .... +``` + +可以看到,该递归跟踪树的高度为`h = n - 1`,并且其中最高的满二叉树子树高度为`n / 2`,因此二分递归版本的复杂度下界为 + +$$ +1 + 2 + 4 + \cdots + 2^{\frac{n}{2}} = \Omega(2^{\frac{n}{2}}) = \Omega(\sqrt{2}^n) +$$ + +而复杂度上界为 + +$$ +1 + 2 + 4 + \cdots + 2^{n - 1} = O(2^n) +$$ + +从上述递归跟踪图中也可以看到,二分递归版本复杂度过高的原因是其中具有大量重复计算的值。一般地,设`fib(k)`的出现次数为`nfib(k)`,则有每一个`fib(k+1)`和`fib(k+2)`都会产生一个`fib(k)`,因此 + +```c +nfib(k) = nfib(k+1) + nfib(k+2), 1 <= k <= n +``` + +并且有 + +```c +nfib(n) = 1, nfib(n - 1) = 1 +``` + +因此, + +```c +nfib(k) = fib(n - k + 1), 1 <= k <= n +``` + +> 递归算法的空间复杂度,主要取决于什么因素? + +递归深度。 + +> 本节数组求和问题的两个(线性和二分)递归算法时间复杂度相同,空间呢? + +每一次递归子问题的空间复杂度都是`O(1)`。 +对于线性递归算法,递归深度为`O(n)`,因此空间复杂度为`O(n)`。 +而二分递归算法,递归深度为`O(logn)`,因此空间复杂度为`O(logn)`。 + +> 自学递推式的一般求解性方法及规律`Master Theorem`。 + +看这篇总结[master_theorem](master_theorem.md) + +## Introduction.F + +> 本节所介绍的迭代式`LCS`算法,似乎需要记录每个子问题的局部解,从而导致空间复杂度激增。实际上,这既不现实,亦无必要。试改进该算法,使得每个子问题只需要常数空间,即可保证最终得到`LCS`的组成(而非仅仅长度) + +这里说的`LCS`算法,应该是返回`LCS`的组成的,并没有在邓公的课件、教材、网课上找到,不过可以从返回`LCS`长度的迭代算法中推广得到。 + +在`LCS`长度的迭代式算法中,需要维护一个`m*n`的向量,来保存各个子问题的`LCS`长度,仿照其思路,在上述`m*n`的向量中,保存各个子问题的`LCS`序列,即可构造出一种`LCS`组成的算法。容易看出,由于每个子问题都需要保存当前的`LCS`,子问题的空间复杂度为`O(min(m, n))`,因此整体的空间复杂度为`O(m*n(min(m, n)))`,的确增加了不少,题目就是要求对这种情况进行改进。 + +可以注意到,上述算法空间复杂度激增的原因,是保存了大量重复的内容。实际上,构成最终`LCS`序列的字符,只在`O(min(m, n))`个位置出现。因此可以仿照图剪枝的策略,在填充向量时动态地记录当前的移动方向,即是从对角线更新,还是从左侧元素更新,还是从上侧元素更新。遍历完成后,再沿着前面标记的方向进行一次反向的遍历,在该过程中记录`LCS`各个字符,从而可以得到整体的`LCS`的组成。该反向遍历至多只会进行`O(m + n)`次,对整体的时间复杂度`O(m*n)`没有显著影响,同时每个子问题的空间复杂度都下降到`O(1)`。该算法的代码如下: + +```cpp +string lcsIt(string one, string two, int len1, int len2){ + string lcs; + if (len1 == 0 || len2 == 0) return lcs; + + vector> states(len1 + 1, vector(len2 + 1)); + for(int i = 0; i != len1; ++i){ + for(int j = 0; j != len2; ++j){ + if(one[i] == two[j]){ + states[i + 1][j + 1].len = states[i][j].len + 1; + states[i + 1][j + 1].dir = DIAGON; + } + else{ + if(states[i][j + 1].len < states[i + 1][j].len){ + states[i + 1][j + 1].len = states[i + 1][j].len; + states[i + 1][j + 1].dir = LEFT; + }else{ + states[i + 1][j + 1].len = states[i][j + 1].len; + states[i + 1][j + 1].dir = UPPER; + } + } + } + } + + lcs.resize(states[len1][len2].len); + int pos = lcs.size(); + for(int i = len1, j = len2; i > 0 && j > 0; ){ + switch(states[i][j].dir){ + case DIAGON: + lcs[--pos] = one[i - 1]; + --i, --j; + break; + case UPPER: + --i; + break; + case LEFT: + --j; + break; + default: + exit(-1); + } + } + return lcs; +} +``` + +需要指出的是,对于多个`LCS`的情形,该算法只会返回其中的一个解,因为在算法中对于两个子问题的`LCS`长度相同的情况,是优先选择上面`UPPER`的子问题。 + +> 考查序列`A = "immaculate`和`B = "computer`。1)它们的`LCS`是什么;2)这里的解是否唯一?是否有歧义性?3)按照本节所给的算法,找出的是哪一个解? + +1)`"mute"`和`"cute"` +2)所以显然不唯一,有歧义性。 +3)按照上面给的算法,优先从上方进行更新,即优先选择序列`A`更靠前的字符,即找出的是`"mute"`。 + +> 实现`LCS`算法的递归版和迭代版,并通过实测比较运行时间。 + +代码和测试分别放在[lcs.cpp](lcs/lcs.cpp)和[test_lcs.cpp](lcs/test_lcs.cpp)了。递归版的确很慢...... + +> 采用`memorization`策略,改进`fib()`和`LCS()`的递归版 + +不想写了...... diff --git a/thu_dsa/chp1/lcs/lcs.cpp b/thu_dsa/chp1/lcs/lcs.cpp index 826ab95..0d598dc 100644 --- a/thu_dsa/chp1/lcs/lcs.cpp +++ b/thu_dsa/chp1/lcs/lcs.cpp @@ -1,22 +1,22 @@ #include "lcs.h" +#include -int lcsRe(string one, string two){ +int lcslenRe(string one, string two, int len1, int len2){ int len; - if(one.empty() || two.empty()) return 0; + if(len1 == 0 || len2 == 0) return 0; - int len1 = one.size(), len2 = two.size(); if(one[len1 - 1] == two[len2 - 1]){ - len = lcsRe(one.substr(0, len1 - 1), two.substr(0, len2 - 1)) + 1; + len = lcslenRe(one.substr(0, len1 - 1), two.substr(0, len2 - 1), len1 - 1, len2 - 1) + 1; return len; } - len = MAX(lcsRe(one.substr(0, len1 - 1), two), lcsRe(one, two.substr(0, len2 - 1))); - return len; + int lenone = lcslenRe(one.substr(0, len1 - 1), two, len1 - 1, len2); + int lentwo = lcslenRe(one, two.substr(0, len2 - 1), len1, len2 - 1); + return lenone > lentwo ? lenone : lentwo; } -int lcsIt(string one, string two){ +int lcslenIt(string one, string two, int len1, int len2){ if(one.empty() || two.empty()) return 0; - int len1 = one.size(), len2 = two.size(); vector> lens(len1 + 1, vector(len2 + 1, 0)); for(int ix = 0; ix != len1; ++ix){ for(int jx = 0; jx != len2; ++jx){ @@ -29,3 +29,90 @@ int lcsIt(string one, string two){ return lens[len1][len2]; } +vector lcsRe(string one, string two, int len1, int len2){ + vector lcs; + if (len1 == 0 || len2 == 0) return lcs; + + vector lcs1, lcs2; + if(one[len1 - 1] == two[len2 - 1]){ + lcs = lcsRe(one.substr(0, len1 - 1), two.substr(0, len2 - 1), len1 - 1, len2 - 1); + if (lcs.empty()) lcs.push_back(one.substr(len1 - 1, 1)); + else + for (auto It = lcs.begin(); It != lcs.end(); ++It) + It->append(1, one[len1 - 1]); + return lcs; + } + lcs1 = lcsRe(one.substr(0, len1 - 1), two, len1 - 1, len2); + lcs2 = lcsRe(one, two.substr(0, len2 - 1), len1, len2 - 1); + if (lcs1.empty()) lcs = lcs2; + else if (lcs2.empty()) lcs = lcs1; + else{ + if (lcs1[0].length() < lcs2[0].length()) lcs = lcs2; + else if (lcs1[0].length() > lcs2[0].length()) lcs = lcs1; + else { + lcs = lcs1; + for (string entry : lcs2) + if(find(lcs.cbegin(), lcs.cend(), entry) == lcs.cend()) + lcs.push_back(entry); + } + } + return lcs; +} + +enum direction { LEFT, UPPER, DIAGON, UNINIT }; + +class State{ +public: + int len; + direction dir; + State(){ + len = 0; + dir = UNINIT; + } +}; + +string lcsIt(string one, string two, int len1, int len2){ + string lcs; + if (len1 == 0 || len2 == 0) return lcs; + + vector> states(len1 + 1, vector(len2 + 1)); + for(int i = 0; i != len1; ++i){ + for(int j = 0; j != len2; ++j){ + if(one[i] == two[j]){ + states[i + 1][j + 1].len = states[i][j].len + 1; + states[i + 1][j + 1].dir = DIAGON; + } + else{ + if(states[i][j + 1].len < states[i + 1][j].len){ + states[i + 1][j + 1].len = states[i + 1][j].len; + states[i + 1][j + 1].dir = LEFT; + }else{ + states[i + 1][j + 1].len = states[i][j + 1].len; + states[i + 1][j + 1].dir = UPPER; + } + } + } + } + + lcs.resize(states[len1][len2].len); + int pos = lcs.size(); + for(int i = len1, j = len2; i > 0 && j > 0; ){ + switch(states[i][j].dir){ + case DIAGON: + lcs[--pos] = one[i - 1]; + --i, --j; + break; + case UPPER: + --i; + break; + case LEFT: + --j; + break; + default: + exit(-1); + } + } + return lcs; +} + + diff --git a/thu_dsa/chp1/lcs/lcs.h b/thu_dsa/chp1/lcs/lcs.h index 5c71655..4a7d011 100644 --- a/thu_dsa/chp1/lcs/lcs.h +++ b/thu_dsa/chp1/lcs/lcs.h @@ -3,6 +3,9 @@ #define MAX(X,Y) ((X)>(Y)?(X):(Y)) using namespace std; -int lcsIt(string one, string two); -int lcsRe(string one, string two); +int lcslenIt(string one, string two, int len1, int len2); +int lcslenRe(string one, string two, int len1, int len2); +string lcsIt(string one, string two, int len1, int len2); +vector lcsRe(string one, string two, int len1, int len2); + diff --git a/thu_dsa/chp1/lcs/test_lcs.cpp b/thu_dsa/chp1/lcs/test_lcs.cpp index 649d086..097fdf5 100644 --- a/thu_dsa/chp1/lcs/test_lcs.cpp +++ b/thu_dsa/chp1/lcs/test_lcs.cpp @@ -2,41 +2,76 @@ #include #include "lcs.h" #include -#define NUMOFLOOP 10 +#define NUMOFLOOP 1 using namespace std; void test_lcsIt(); void test_lcsRe(); +void test_lcslenIt(); +void test_lcslenRe(); int main(){ - test_lcsIt(); + test_lcslenIt(); + test_lcslenRe(); test_lcsRe(); -} - -void test_lcsIt(){ - clock_t begin, end; - int len; - - begin = clock(); - for(int ix = 0; ix != NUMOFLOOP; ++ix) - len = lcsIt(string("educational"), string("advantage")); - end = clock(); - assert(len == 4); - assert(lcsIt(string("didactical"), string("advantage")) == 4); - cout << "Iterative lcs test passed." << endl; - cout << "Running time: " << end - begin << endl; + test_lcsIt(); + system("pause"); } void test_lcsRe(){ clock_t begin, end; - int len; begin = clock(); - for(int ix = 0; ix != NUMOFLOOP; ++ix) - len = lcsRe(string("educational"), string("advantage")); + //vector lcs = lcsRe(string("educational"), string("advantage"), 11, 9); + vector lcs = lcsRe(string("immaculate"), string("computer"), 10, 8); end = clock(); - assert(len == 4); - assert(lcsRe(string("didactical"), string("advantager")) == 4); + assert(!lcs.empty()); + assert(lcs[0].length() == 4); + for (string entry : lcs) + cout << entry << " "; + cout << "\nRecursive lcs test passed." << endl; + cout << "Running time: " << end - begin << endl; +} + +void test_lcsIt() { + clock_t begin, end; + + begin = clock(); + //string lcs = lcsIt(string("educational"), string("advantage"), 11, 9); + string lcs = lcsIt(string("immaculate"), string("computer"), 10, 8); + end = clock(); + assert(!lcs.empty()); + assert(lcs.length() == 4); + cout << lcs << endl; + cout << "Recursive lcs test passed." << endl; + cout << "Running time: " << end - begin << endl; +} + + +void test_lcslenIt(){ + clock_t begin, end; + int len; + + begin = clock(); + for(int ix = 0; ix != NUMOFLOOP; ++ix) + len = lcslenIt(string("educational"), string("advantage"), 11, 9); + end = clock(); + assert(len == 4); + assert(lcslenIt(string("didactical"), string("advantage"), 10, 9) == 4); + cout << "Iterative lcs test passed." << endl; + cout << "Running time: " << end - begin << endl; +} + +void test_lcslenRe(){ + clock_t begin, end; + int len; + + begin = clock(); + for(int ix = 0; ix != NUMOFLOOP; ++ix) + len = lcslenRe(string("educational"), string("advantage"), 11, 9); + end = clock(); + assert(len == 4); + assert(lcslenRe(string("didactical"), string("advantage"), 10, 9) == 4); cout << "Recursive lcs test passed." << endl; cout << "Running time: " << end - begin << endl; } diff --git a/thu_dsa/chp1/master theorem.md b/thu_dsa/chp1/master_theorem.md similarity index 100% rename from thu_dsa/chp1/master theorem.md rename to thu_dsa/chp1/master_theorem.md diff --git a/words.md b/words.md index c8de90e..2c7258b 100644 --- a/words.md +++ b/words.md @@ -1200,7 +1200,7 @@ Some Words + philharmonic > used in the names of musical groups, especially orchestras - - He will condcuct the Berlin Philharmonic in the final concert of the season. + - He will conduct the Berlin Philharmonic in the final concert of the season. - the Vienna Philharmonic Orchestra. + vibrant