From 0ea635e8f5c6c983034ac1e73c010001c1bdf657 Mon Sep 17 00:00:00 2001
From: Shine wOng <1551885@tongji.edu.cn>
Date: Tue, 15 Oct 2019 16:27:31 +0800
Subject: [PATCH] some modifications in thu_dsa/chp1, adding two functions in
 lcs/, updating exercises.md.

---
 thu_dsa/chp1/exercises.md                     | 136 ++++++++++++++++++
 thu_dsa/chp1/lcs/lcs.cpp                      | 103 +++++++++++--
 thu_dsa/chp1/lcs/lcs.h                        |   7 +-
 thu_dsa/chp1/lcs/test_lcs.cpp                 |  77 +++++++---
 .../{master theorem.md => master_theorem.md}  |   0
 words.md                                      |   2 +-
 6 files changed, 293 insertions(+), 32 deletions(-)
 rename thu_dsa/chp1/{master theorem.md => master_theorem.md} (100%)

diff --git a/thu_dsa/chp1/exercises.md b/thu_dsa/chp1/exercises.md
index 5fb9e9e..162d095 100644
--- a/thu_dsa/chp1/exercises.md
+++ b/thu_dsa/chp1/exercises.md
@@ -35,3 +35,139 @@
 (<, 1; 0, R, >);
 (>, #; #, R, h);//halt
 ```
+
+## Introduction.E
+
+> 做递归跟踪分析时，为什么递归调用语句本身可不统计？
+
+因为递归语句本身的执行时间，是计入了对应的子实例当中，对于当前实例而言，只需要考虑函数调用的跳转执行，该执行的执行时间是`O(1)`。
+
+> 试用递归跟踪法，分析`fib()`二分递归版的复杂度。通过递归跟踪，解释该版本复杂度过高的原因
+
+可以针对`fib(n)`画出递归跟踪图，如下：
+
+```
+                fib(n)
+              /        \
+            n-1        n-2
+           /   \       /  \
+         n-2   n-3   n-3   n-4
+        /
+      ....
+```
+
+可以看到，该递归跟踪树的高度为`h = n - 1`，并且其中最高的满二叉树子树高度为`n / 2`，因此二分递归版本的复杂度下界为
+
+$$
+1 + 2 + 4 + \cdots + 2^{\frac{n}{2}} = \Omega(2^{\frac{n}{2}}) = \Omega(\sqrt{2}^n)
+$$
+
+而复杂度上界为
+
+$$
+1 + 2 + 4 + \cdots + 2^{n - 1} = O(2^n)
+$$
+
+从上述递归跟踪图中也可以看到，二分递归版本复杂度过高的原因是其中具有大量重复计算的值。一般地，设`fib(k)`的出现次数为`nfib(k)`，则有每一个`fib(k+1)`和`fib(k+2)`都会产生一个`fib(k)`，因此
+
+```c
+nfib(k) = nfib(k+1) + nfib(k+2), 1 <= k <= n
+```
+
+并且有
+
+```c
+nfib(n) = 1, nfib(n - 1) = 1
+```
+
+因此，
+
+```c
+nfib(k) = fib(n - k + 1), 1 <= k <= n
+```
+
+> 递归算法的空间复杂度，主要取决于什么因素？
+
+递归深度。
+
+> 本节数组求和问题的两个（线性和二分）递归算法时间复杂度相同，空间呢？
+
+每一次递归子问题的空间复杂度都是`O(1)`。
+对于线性递归算法，递归深度为`O(n)`，因此空间复杂度为`O(n)`。
+而二分递归算法，递归深度为`O(logn)`，因此空间复杂度为`O(logn)`。
+
+> 自学递推式的一般求解性方法及规律`Master Theorem`。
+
+看这篇总结[master_theorem](master_theorem.md)
+
+## Introduction.F
+
+> 本节所介绍的迭代式`LCS`算法，似乎需要记录每个子问题的局部解，从而导致空间复杂度激增。实际上，这既不现实，亦无必要。试改进该算法，使得每个子问题只需要常数空间，即可保证最终得到`LCS`的组成（而非仅仅长度）
+
+这里说的`LCS`算法，应该是返回`LCS`的组成的，并没有在邓公的课件、教材、网课上找到，不过可以从返回`LCS`长度的迭代算法中推广得到。
+
+在`LCS`长度的迭代式算法中，需要维护一个`m*n`的向量，来保存各个子问题的`LCS`长度，仿照其思路，在上述`m*n`的向量中，保存各个子问题的`LCS`序列，即可构造出一种`LCS`组成的算法。容易看出，由于每个子问题都需要保存当前的`LCS`，子问题的空间复杂度为`O(min(m, n))`，因此整体的空间复杂度为`O(m*n(min(m, n)))`，的确增加了不少，题目就是要求对这种情况进行改进。
+
+可以注意到，上述算法空间复杂度激增的原因，是保存了大量重复的内容。实际上，构成最终`LCS`序列的字符，只在`O(min(m, n))`个位置出现。因此可以仿照图剪枝的策略，在填充向量时动态地记录当前的移动方向，即是从对角线更新，还是从左侧元素更新，还是从上侧元素更新。遍历完成后，再沿着前面标记的方向进行一次反向的遍历，在该过程中记录`LCS`各个字符，从而可以得到整体的`LCS`的组成。该反向遍历至多只会进行`O(m + n)`次，对整体的时间复杂度`O(m*n)`没有显著影响，同时每个子问题的空间复杂度都下降到`O(1)`。该算法的代码如下：
+
+```cpp
+string lcsIt(string one, string two, int len1, int len2){
+	string lcs;
+	if (len1 == 0 || len2 == 0) return lcs;
+
+	vector<vector<State>> states(len1 + 1, vector<State>(len2 + 1));
+	for(int i = 0; i != len1; ++i){
+		for(int j = 0; j != len2; ++j){
+			if(one[i] == two[j]){
+				states[i + 1][j + 1].len = states[i][j].len + 1;
+				states[i + 1][j + 1].dir = DIAGON;
+			}
+			else{
+				if(states[i][j + 1].len < states[i + 1][j].len){
+					states[i + 1][j + 1].len = states[i + 1][j].len;
+					states[i + 1][j + 1].dir = LEFT;
+				}else{
+					states[i + 1][j + 1].len = states[i][j + 1].len;
+					states[i + 1][j + 1].dir = UPPER;
+				}
+			}
+		}
+	}
+
+	lcs.resize(states[len1][len2].len);
+	int pos = lcs.size();
+	for(int i = len1, j = len2; i > 0 && j > 0; ){
+		switch(states[i][j].dir){
+		case DIAGON:
+			lcs[--pos] = one[i - 1];
+			--i, --j;
+			break;
+		case UPPER:
+			--i;
+			break;
+		case LEFT:
+			--j;
+			break;
+		default:
+			exit(-1);
+		}
+	}
+	return lcs;
+}
+```
+
+需要指出的是，对于多个`LCS`的情形，该算法只会返回其中的一个解，因为在算法中对于两个子问题的`LCS`长度相同的情况，是优先选择上面`UPPER`的子问题。
+
+> 考查序列`A = "immaculate`和`B = "computer`。1）它们的`LCS`是什么；2）这里的解是否唯一？是否有歧义性？3）按照本节所给的算法，找出的是哪一个解？
+
+1）`"mute"`和`"cute"`
+2）所以显然不唯一，有歧义性。
+3）按照上面给的算法，优先从上方进行更新，即优先选择序列`A`更靠前的字符，即找出的是`"mute"`。
+
+> 实现`LCS`算法的递归版和迭代版，并通过实测比较运行时间。
+
+代码和测试分别放在[lcs.cpp](lcs/lcs.cpp)和[test_lcs.cpp](lcs/test_lcs.cpp)了。递归版的确很慢......
+
+> 采用`memorization`策略，改进`fib()`和`LCS()`的递归版
+
+不想写了......
diff --git a/thu_dsa/chp1/lcs/lcs.cpp b/thu_dsa/chp1/lcs/lcs.cpp
index 826ab95..0d598dc 100644
--- a/thu_dsa/chp1/lcs/lcs.cpp
+++ b/thu_dsa/chp1/lcs/lcs.cpp
@@ -1,22 +1,22 @@
 #include "lcs.h"
+#include <algorithm>
 
-int lcsRe(string one, string two){
+int lcslenRe(string one, string two, int len1, int len2){
 	int len;
-	if(one.empty() || two.empty()) return 0;
+	if(len1 == 0 || len2 == 0) return 0;
 
-	int len1 = one.size(), len2 = two.size();
 	if(one[len1 - 1] == two[len2 - 1]){
-		len = lcsRe(one.substr(0, len1 - 1), two.substr(0, len2 - 1)) + 1;
+		len = lcslenRe(one.substr(0, len1 - 1), two.substr(0, len2 - 1), len1 - 1, len2 - 1) + 1;
 		return len;
 	}
-	len = MAX(lcsRe(one.substr(0, len1 - 1), two), lcsRe(one, two.substr(0, len2 - 1)));
-	return len;
+	int lenone = lcslenRe(one.substr(0, len1 - 1), two, len1 - 1, len2);
+	int lentwo = lcslenRe(one, two.substr(0, len2 - 1), len1, len2 - 1);
+	return lenone > lentwo ? lenone : lentwo;
 }
 
-int lcsIt(string one, string two){
+int lcslenIt(string one, string two, int len1, int len2){ 
 	if(one.empty() || two.empty()) return 0;
 	
-	int len1 = one.size(), len2 = two.size();
 	vector<vector<int>> lens(len1 + 1, vector<int>(len2 + 1, 0));
 	for(int ix = 0; ix != len1; ++ix){
 		for(int jx = 0; jx != len2; ++jx){
@@ -29,3 +29,90 @@ int lcsIt(string one, string two){
 	return lens[len1][len2];
 }
 
+vector<string> lcsRe(string one, string two, int len1, int len2){
+	vector<string> lcs;
+	if (len1 == 0 || len2 == 0) return lcs;
+
+	vector<string> lcs1, lcs2;
+	if(one[len1 - 1] == two[len2 - 1]){
+		lcs = lcsRe(one.substr(0, len1 - 1), two.substr(0, len2 - 1), len1 - 1, len2 - 1);
+		if (lcs.empty()) lcs.push_back(one.substr(len1 - 1, 1));
+		else
+			for (auto It = lcs.begin(); It != lcs.end(); ++It)
+				It->append(1, one[len1 - 1]);
+		return lcs;
+	}
+	lcs1 = lcsRe(one.substr(0, len1 - 1), two, len1 - 1, len2);
+	lcs2 = lcsRe(one, two.substr(0, len2 - 1), len1, len2 - 1);
+	if (lcs1.empty()) lcs = lcs2;
+	else if (lcs2.empty()) lcs = lcs1;
+	else{
+		if (lcs1[0].length() < lcs2[0].length()) lcs = lcs2;
+		else if (lcs1[0].length() > lcs2[0].length()) lcs = lcs1;
+		else {
+			lcs = lcs1;
+			for (string entry : lcs2)
+				if(find(lcs.cbegin(), lcs.cend(), entry) == lcs.cend())
+					lcs.push_back(entry);
+		}
+	}
+	return lcs;
+}
+
+enum direction { LEFT, UPPER, DIAGON, UNINIT };
+
+class State{
+public:
+	int len;
+	direction dir;
+	State(){
+		len = 0;
+		dir = UNINIT;
+	}
+};
+
+string lcsIt(string one, string two, int len1, int len2){
+	string lcs;
+	if (len1 == 0 || len2 == 0) return lcs;
+
+	vector<vector<State>> states(len1 + 1, vector<State>(len2 + 1));
+	for(int i = 0; i != len1; ++i){
+		for(int j = 0; j != len2; ++j){
+			if(one[i] == two[j]){
+				states[i + 1][j + 1].len = states[i][j].len + 1;
+				states[i + 1][j + 1].dir = DIAGON;
+			}
+			else{
+				if(states[i][j + 1].len < states[i + 1][j].len){
+					states[i + 1][j + 1].len = states[i + 1][j].len;
+					states[i + 1][j + 1].dir = LEFT;
+				}else{
+					states[i + 1][j + 1].len = states[i][j + 1].len;
+					states[i + 1][j + 1].dir = UPPER;
+				}
+			}
+		}
+	}
+
+	lcs.resize(states[len1][len2].len);
+	int pos = lcs.size();
+	for(int i = len1, j = len2; i > 0 && j > 0; ){
+		switch(states[i][j].dir){
+		case DIAGON:
+			lcs[--pos] = one[i - 1];
+			--i, --j;
+			break;
+		case UPPER:
+			--i;
+			break;
+		case LEFT:
+			--j;
+			break;
+		default:
+			exit(-1);
+		}
+	}
+	return lcs;
+}
+
+
diff --git a/thu_dsa/chp1/lcs/lcs.h b/thu_dsa/chp1/lcs/lcs.h
index 5c71655..4a7d011 100644
--- a/thu_dsa/chp1/lcs/lcs.h
+++ b/thu_dsa/chp1/lcs/lcs.h
@@ -3,6 +3,9 @@
 #define MAX(X,Y) ((X)>(Y)?(X):(Y))
 using namespace std;
 
-int lcsIt(string one, string two);
-int lcsRe(string one, string two);
+int lcslenIt(string one, string two, int len1, int len2);
+int lcslenRe(string one, string two, int len1, int len2);
+string lcsIt(string one, string two, int len1, int len2);
+vector<string> lcsRe(string one, string two, int len1, int len2);
+
 
diff --git a/thu_dsa/chp1/lcs/test_lcs.cpp b/thu_dsa/chp1/lcs/test_lcs.cpp
index 649d086..097fdf5 100644
--- a/thu_dsa/chp1/lcs/test_lcs.cpp
+++ b/thu_dsa/chp1/lcs/test_lcs.cpp
@@ -2,41 +2,76 @@
 #include <cassert>
 #include "lcs.h"
 #include <time.h>
-#define NUMOFLOOP 10
+#define NUMOFLOOP 1
 using namespace std;
 
 void test_lcsIt();
 void test_lcsRe();
+void test_lcslenIt();
+void test_lcslenRe();
 
 int main(){
-	test_lcsIt();
+	test_lcslenIt();
+	test_lcslenRe();
 	test_lcsRe();
-}
-
-void test_lcsIt(){
-	clock_t begin, end;
-	int len;
-
-	begin = clock();
-	for(int ix = 0; ix != NUMOFLOOP; ++ix)
-		len = lcsIt(string("educational"), string("advantage"));
-	end = clock();
-	assert(len == 4);
-	assert(lcsIt(string("didactical"), string("advantage")) == 4);
-	cout << "Iterative lcs test passed." << endl;
-	cout << "Running time: " << end - begin << endl;
+	test_lcsIt();
+	system("pause");
 }
 
 void test_lcsRe(){
 	clock_t begin, end;
-	int len;
 
 	begin = clock();
-	for(int ix = 0; ix != NUMOFLOOP; ++ix)
-		len = lcsRe(string("educational"), string("advantage"));
+	//vector<string> lcs = lcsRe(string("educational"), string("advantage"), 11, 9);
+	vector<string> lcs = lcsRe(string("immaculate"), string("computer"), 10, 8);
 	end = clock();
-	assert(len == 4);
-	assert(lcsRe(string("didactical"), string("advantager")) == 4);
+	assert(!lcs.empty());
+	assert(lcs[0].length() == 4);
+	for (string entry : lcs)
+		cout << entry << " ";
+	cout << "\nRecursive lcs test passed." << endl;
+	cout << "Running time: " << end - begin << endl;
+}
+
+void test_lcsIt() {
+	clock_t begin, end;
+
+	begin = clock();
+	//string lcs = lcsIt(string("educational"), string("advantage"), 11, 9);
+	string lcs = lcsIt(string("immaculate"), string("computer"), 10, 8);
+	end = clock();
+	assert(!lcs.empty());
+	assert(lcs.length() == 4);
+	cout << lcs << endl;
+	cout << "Recursive lcs test passed." << endl;
+	cout << "Running time: " << end - begin << endl;
+}
+
+
+void test_lcslenIt(){
+	clock_t begin, end;
+	int len;
+
+	begin = clock();
+	for(int ix = 0; ix != NUMOFLOOP; ++ix)
+		len = lcslenIt(string("educational"), string("advantage"), 11, 9);
+	end = clock();
+	assert(len == 4);
+	assert(lcslenIt(string("didactical"), string("advantage"), 10, 9) == 4);
+	cout << "Iterative lcs test passed." << endl;
+	cout << "Running time: " << end - begin << endl;
+}
+
+void test_lcslenRe(){
+	clock_t begin, end;
+	int len;
+
+	begin = clock();
+	for(int ix = 0; ix != NUMOFLOOP; ++ix)
+		len = lcslenRe(string("educational"), string("advantage"), 11, 9);
+	end = clock();
+	assert(len == 4);
+	assert(lcslenRe(string("didactical"), string("advantage"), 10, 9) == 4);
 	cout << "Recursive lcs test passed." << endl;
 	cout << "Running time: " << end - begin << endl;
 }
diff --git a/thu_dsa/chp1/master theorem.md b/thu_dsa/chp1/master_theorem.md
similarity index 100%
rename from thu_dsa/chp1/master theorem.md
rename to thu_dsa/chp1/master_theorem.md
diff --git a/words.md b/words.md
index c8de90e..2c7258b 100644
--- a/words.md
+++ b/words.md
@@ -1200,7 +1200,7 @@ Some Words
 + philharmonic
 > used in the names of musical groups, especially orchestras
 
-	- He will condcuct the Berlin Philharmonic in the final concert of the season.
+	- He will conduct the Berlin Philharmonic in the final concert of the season.
 	- the Vienna Philharmonic Orchestra.
 
 + vibrant