From 888f3b9e3257fa97269ac0b40177f7a2f80f9877 Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Thu, 2 Mar 2017 14:37:44 +0800 Subject: [PATCH 1/8] =?UTF-8?q?wenhuanhuan=E8=BF=9B=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/11.使用Apriori算法进行关联分析.md | 9 ++++++++ .../12.使用FP-growth算法来高效发现频繁项集.md | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 docs/11.使用Apriori算法进行关联分析.md create mode 100644 docs/12.使用FP-growth算法来高效发现频繁项集.md diff --git a/docs/11.使用Apriori算法进行关联分析.md b/docs/11.使用Apriori算法进行关联分析.md new file mode 100644 index 00000000..ad85b87a --- /dev/null +++ b/docs/11.使用Apriori算法进行关联分析.md @@ -0,0 +1,9 @@ +# 11.使用Apriori算法进行关联分析 # + +**- 概念** + +1. 关联关系:associati analysis 从大规模数据集中寻找物品间的隐含关系呗称作关联关系或者关联规则学习。关系有两种形式:频繁项集(经常一块出现的物品集合);关联规则(暗示两种物品之间可能存在很强的关系) +2. 项集的支持度(support):数据集中包含该项集的记录所占的比例 +3. 置信度(confidence):置信度({A}->{B}) = 支持度{A,B}/支持度{A} + +Apriori原理:如果某个项集是频繁的,那么它的所有子集也是频繁的,反之,一个项集是非频繁的,那么它的所有超集也是非频繁的 \ No newline at end of file diff --git a/docs/12.使用FP-growth算法来高效发现频繁项集.md b/docs/12.使用FP-growth算法来高效发现频繁项集.md new file mode 100644 index 00000000..adcb7cb3 --- /dev/null +++ b/docs/12.使用FP-growth算法来高效发现频繁项集.md @@ -0,0 +1,21 @@ +# 12.使用FP-growth算法来高效发现频繁项集 # + + +**- 基本过程** + +- 构建FP树 + * 对原始数据集扫描两遍 + * 第一遍对所有元素项遍历,并记下出现次数。 + * 第二遍只扫描频繁元素。 +- 从FP树种挖掘频繁项集 + +**FP树介绍** + + 是一种紧凑的数据结构,FP代表频繁模式(Frequent Pattem)每个项集以路径的方式存储在树中。 + 包含:项集【集合中的单个元素+出现次数+父节点】 +* 与其他树结构相比 + * 它通过链接(link)来连接相似元素,被连起来的元素项可以看成一个链表。 + * 一个元素项可以出现多次 + + + From e559c272553a7fd2f41f53c3bb719a4cf100f0a5 Mon Sep 17 00:00:00 2001 From: geekidentity Date: Fri, 3 Mar 2017 09:25:32 +0800 Subject: [PATCH 2/8] =?UTF-8?q?add=20file=205.Logistic=E5=9B=9E=E5=BD=92.m?= =?UTF-8?q?d=20=20and=20change=20readme.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + docs/5.Logistic回归.md | 0 2 files changed, 1 insertion(+) create mode 100644 docs/5.Logistic回归.md diff --git a/README.md b/README.md index fc123341..6017260e 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ * [决策树](./docs/3.决策树.md) * 4) 基于概率论的分类方法:朴素贝叶斯 * 5) Logistic回归 + * [Logistic回归](./docs/5.Logistic回归.md) * 6) 支持向量机 * 7) 利用AdaBoost元算法提高分类 diff --git a/docs/5.Logistic回归.md b/docs/5.Logistic回归.md new file mode 100644 index 00000000..e69de29b From d71b28e104891d7cd288e14ab1b1203fdc1b7d57 Mon Sep 17 00:00:00 2001 From: yangjifei <1714004716@qq.com> Date: Fri, 3 Mar 2017 13:13:59 +0800 Subject: [PATCH 3/8] add logistic regression --- docs/5.Logistic回归.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/5.Logistic回归.md b/docs/5.Logistic回归.md index e69de29b..e049ec53 100644 --- a/docs/5.Logistic回归.md +++ b/docs/5.Logistic回归.md @@ -0,0 +1,11 @@ + +# 1) ߼ع + + * ߼ع(Logistic Regression) + * 1.1 + * 1.2 ˵ʾ + * 1.3 ж߽ + * 1.4 ۺ + * 1.5 򻯵ijɱݶ½ + * 1.6 ߼Ż + * 1.7 ࣺһ \ No newline at end of file From 80ad02982caef21188ad587382bf7e0e12f09232 Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Fri, 3 Mar 2017 15:21:38 +0800 Subject: [PATCH 4/8] =?UTF-8?q?11=E3=80=81Aoriori?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/11.使用Apriori算法进行关联分析.md | 2 +- src/python/12.FrequentPattemTree/apriori.py | 12 ++++++++++++ src/python/12.FrequentPattemTree/fpGrowth.py | 19 +++++++++++++++++++ src/python/12.FrequentPattemTree/test.py | 5 +++++ 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/python/12.FrequentPattemTree/apriori.py create mode 100644 src/python/12.FrequentPattemTree/fpGrowth.py create mode 100644 src/python/12.FrequentPattemTree/test.py diff --git a/docs/11.使用Apriori算法进行关联分析.md b/docs/11.使用Apriori算法进行关联分析.md index ad85b87a..71adb177 100644 --- a/docs/11.使用Apriori算法进行关联分析.md +++ b/docs/11.使用Apriori算法进行关联分析.md @@ -6,4 +6,4 @@ 2. 项集的支持度(support):数据集中包含该项集的记录所占的比例 3. 置信度(confidence):置信度({A}->{B}) = 支持度{A,B}/支持度{A} -Apriori原理:如果某个项集是频繁的,那么它的所有子集也是频繁的,反之,一个项集是非频繁的,那么它的所有超集也是非频繁的 \ No newline at end of file +Apriori原理:如果某个项集是频繁的,那么它的所有子集也是频繁的,反之,一个项集是非频繁的,那么它的所有超集也是非频繁的。 \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/apriori.py b/src/python/12.FrequentPattemTree/apriori.py new file mode 100644 index 00000000..e172b575 --- /dev/null +++ b/src/python/12.FrequentPattemTree/apriori.py @@ -0,0 +1,12 @@ +def loadDataSet(): + return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] +def createC1(dataSet): + c1=[] + for transaction in dataSet: + for item in transaction: + if not [item] in c1: + c1.append([item]) + c1.sort() + return map(frozenset,c1) +def scanD(D,ck,minSupport): + ssCnt = {} \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/fpGrowth.py b/src/python/12.FrequentPattemTree/fpGrowth.py new file mode 100644 index 00000000..5e3898cc --- /dev/null +++ b/src/python/12.FrequentPattemTree/fpGrowth.py @@ -0,0 +1,19 @@ +class treeNode: + def __init__(self,nameValue,numOccur,parentNode): + self.name = nameValue + self.count = numOccur + self.nodeLink = None + self.parent = parentNode + self.children = {} + def inc(self,numOccur): + self.count += numOccur + def disp(self,ind=1): + print(' '*ind,self.name,' ',self.count) + for child in self.children.values(): + child.disp(ind+1) + + if __name__ == "__main__": + import fpGrowth + rootNode = fpGrowth.treeNode('pyramid',9,None) + rootNode.children['eye']=fpGrowth.treeNode('eye',13,None) + rootNode.disp() \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/test.py b/src/python/12.FrequentPattemTree/test.py new file mode 100644 index 00000000..8702723e --- /dev/null +++ b/src/python/12.FrequentPattemTree/test.py @@ -0,0 +1,5 @@ +class Test: + if __name__ == "__main__": + fza=frozenset(['a','bc']) + adict={fza:1,'b':2} + print(adict) \ No newline at end of file From 2b5f2c894f3016c3acd7a5cd97d3253a89b92aeb Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Fri, 3 Mar 2017 16:13:23 +0800 Subject: [PATCH 5/8] FIRst commit --- .../.idea/12.FrequentPattemTree.iml | 11 + .../12.FrequentPattemTree/.idea/misc.xml | 4 + .../12.FrequentPattemTree/.idea/modules.xml | 8 + .../12.FrequentPattemTree/.idea/workspace.xml | 405 ++++++++++++++++++ 4 files changed, 428 insertions(+) create mode 100644 src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml create mode 100644 src/python/12.FrequentPattemTree/.idea/misc.xml create mode 100644 src/python/12.FrequentPattemTree/.idea/modules.xml create mode 100644 src/python/12.FrequentPattemTree/.idea/workspace.xml diff --git a/src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml b/src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml new file mode 100644 index 00000000..67116063 --- /dev/null +++ b/src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/.idea/misc.xml b/src/python/12.FrequentPattemTree/.idea/misc.xml new file mode 100644 index 00000000..c9667eb7 --- /dev/null +++ b/src/python/12.FrequentPattemTree/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/.idea/modules.xml b/src/python/12.FrequentPattemTree/.idea/modules.xml new file mode 100644 index 00000000..89b88c71 --- /dev/null +++ b/src/python/12.FrequentPattemTree/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/.idea/workspace.xml b/src/python/12.FrequentPattemTree/.idea/workspace.xml new file mode 100644 index 00000000..8b951b55 --- /dev/null +++ b/src/python/12.FrequentPattemTree/.idea/workspace.xml @@ -0,0 +1,405 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1488289229649 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 1a1369bf137d2e392acb6717b8cde8e2690de36f Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Fri, 3 Mar 2017 16:56:08 +0800 Subject: [PATCH 6/8] delete idea --- .../.idea/12.FrequentPattemTree.iml | 11 - .../12.FrequentPattemTree/.idea/misc.xml | 4 - .../12.FrequentPattemTree/.idea/modules.xml | 8 - .../12.FrequentPattemTree/.idea/workspace.xml | 405 ------------------ 4 files changed, 428 deletions(-) delete mode 100644 src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml delete mode 100644 src/python/12.FrequentPattemTree/.idea/misc.xml delete mode 100644 src/python/12.FrequentPattemTree/.idea/modules.xml delete mode 100644 src/python/12.FrequentPattemTree/.idea/workspace.xml diff --git a/src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml b/src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml deleted file mode 100644 index 67116063..00000000 --- a/src/python/12.FrequentPattemTree/.idea/12.FrequentPattemTree.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/.idea/misc.xml b/src/python/12.FrequentPattemTree/.idea/misc.xml deleted file mode 100644 index c9667eb7..00000000 --- a/src/python/12.FrequentPattemTree/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/.idea/modules.xml b/src/python/12.FrequentPattemTree/.idea/modules.xml deleted file mode 100644 index 89b88c71..00000000 --- a/src/python/12.FrequentPattemTree/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/.idea/workspace.xml b/src/python/12.FrequentPattemTree/.idea/workspace.xml deleted file mode 100644 index 8b951b55..00000000 --- a/src/python/12.FrequentPattemTree/.idea/workspace.xml +++ /dev/null @@ -1,405 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1488289229649 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From eee18de54d65f2f594946475e53288d740ca1ad5 Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Fri, 3 Mar 2017 17:20:03 +0800 Subject: [PATCH 7/8] delete idea2 --- .../12.FrequentPattemTree/.idea/workspace.xml | 405 ++++++++++++++++++ 1 file changed, 405 insertions(+) create mode 100644 src/python/12.FrequentPattemTree/.idea/workspace.xml diff --git a/src/python/12.FrequentPattemTree/.idea/workspace.xml b/src/python/12.FrequentPattemTree/.idea/workspace.xml new file mode 100644 index 00000000..cf2ba026 --- /dev/null +++ b/src/python/12.FrequentPattemTree/.idea/workspace.xml @@ -0,0 +1,405 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1488289229649 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 5d14099924c11d6538725d982a4c1da5051a51a7 Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Fri, 3 Mar 2017 17:22:53 +0800 Subject: [PATCH 8/8] delete idea1 --- .../12.FrequentPattemTree/.idea/workspace.xml | 405 ------------------ 1 file changed, 405 deletions(-) delete mode 100644 src/python/12.FrequentPattemTree/.idea/workspace.xml diff --git a/src/python/12.FrequentPattemTree/.idea/workspace.xml b/src/python/12.FrequentPattemTree/.idea/workspace.xml deleted file mode 100644 index cf2ba026..00000000 --- a/src/python/12.FrequentPattemTree/.idea/workspace.xml +++ /dev/null @@ -1,405 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1488289229649 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file