Merge pull request #15 from xinzizi/master

# add 11&12 关键规则的介绍
2026-02-12 23:05:14 +08:00 · 2017-03-03 17:30:16 +08:00
parent b6d455cb23 5d14099924
commit 790a641b8c
5 changed files with 66 additions and 0 deletions
--- a/docs/11.使用Apriori算法进行关联分析.md
+++ b/docs/11.使用Apriori算法进行关联分析.md
@@ -0,0 +1,9 @@
+# 11.使用Apriori算法进行关联分析 #
+
+**- 概念**
+
+1. 关联关系：associati analysis 从大规模数据集中寻找物品间的隐含关系呗称作关联关系或者关联规则学习。关系有两种形式：频繁项集（经常一块出现的物品集合）；关联规则（暗示两种物品之间可能存在很强的关系）
+2. 项集的支持度(support)：数据集中包含该项集的记录所占的比例
+3. 置信度（confidence)：置信度({A}->{B}) = 支持度{A,B}/支持度{A} 
+
+Apriori原理：如果某个项集是频繁的，那么它的所有子集也是频繁的，反之，一个项集是非频繁的，那么它的所有超集也是非频繁的。
--- a/docs/12.使用FP-growth算法来高效发现频繁项集.md
+++ b/docs/12.使用FP-growth算法来高效发现频繁项集.md
@@ -0,0 +1,21 @@
+# 12.使用FP-growth算法来高效发现频繁项集 #
+
+
+**- 基本过程**
+
+- 构建FP树
+ * 对原始数据集扫描两遍
+    * 第一遍对所有元素项遍历，并记下出现次数。
+    * 第二遍只扫描频繁元素。
+- 从FP树种挖掘频繁项集
+
+**FP树介绍**
+  
+    是一种紧凑的数据结构，FP代表频繁模式（Frequent Pattem）每个项集以路径的方式存储在树中。
+    包含：项集【集合中的单个元素+出现次数+父节点】
+* 与其他树结构相比
+    * 它通过链接(link)来连接相似元素，被连起来的元素项可以看成一个链表。
+    * 一个元素项可以出现多次
+ 
+
+
--- a/src/python/12.FrequentPattemTree/apriori.py
+++ b/src/python/12.FrequentPattemTree/apriori.py
@@ -0,0 +1,12 @@
+def loadDataSet():
+    return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
+def createC1(dataSet):
+    c1=[]
+    for transaction in dataSet:
+        for item in transaction:
+            if not [item] in c1:
+                c1.append([item])
+    c1.sort()
+    return map(frozenset,c1)
+def scanD(D,ck,minSupport):
+    ssCnt = {}
--- a/src/python/12.FrequentPattemTree/fpGrowth.py
+++ b/src/python/12.FrequentPattemTree/fpGrowth.py
@@ -0,0 +1,19 @@
+class treeNode:
+    def __init__(self,nameValue,numOccur,parentNode):
+        self.name = nameValue
+        self.count = numOccur
+        self.nodeLink = None
+        self.parent = parentNode
+        self.children = {}
+    def inc(self,numOccur):
+        self.count += numOccur
+    def disp(self,ind=1):
+        print(' '*ind,self.name,' ',self.count)
+        for child in self.children.values():
+            child.disp(ind+1)
+
+    if __name__ == "__main__":
+        import fpGrowth
+        rootNode = fpGrowth.treeNode('pyramid',9,None)
+        rootNode.children['eye']=fpGrowth.treeNode('eye',13,None)
+        rootNode.disp()
--- a/src/python/12.FrequentPattemTree/test.py
+++ b/src/python/12.FrequentPattemTree/test.py
@@ -0,0 +1,5 @@
+class Test:
+    if __name__ == "__main__":
+        fza=frozenset(['a','bc'])
+        adict={fza:1,'b':2}
+        print(adict)