From 80ad02982caef21188ad587382bf7e0e12f09232 Mon Sep 17 00:00:00 2001 From: wenhuanhuan Date: Fri, 3 Mar 2017 15:21:38 +0800 Subject: [PATCH] =?UTF-8?q?11=E3=80=81Aoriori?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/11.使用Apriori算法进行关联分析.md | 2 +- src/python/12.FrequentPattemTree/apriori.py | 12 ++++++++++++ src/python/12.FrequentPattemTree/fpGrowth.py | 19 +++++++++++++++++++ src/python/12.FrequentPattemTree/test.py | 5 +++++ 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/python/12.FrequentPattemTree/apriori.py create mode 100644 src/python/12.FrequentPattemTree/fpGrowth.py create mode 100644 src/python/12.FrequentPattemTree/test.py diff --git a/docs/11.使用Apriori算法进行关联分析.md b/docs/11.使用Apriori算法进行关联分析.md index ad85b87a..71adb177 100644 --- a/docs/11.使用Apriori算法进行关联分析.md +++ b/docs/11.使用Apriori算法进行关联分析.md @@ -6,4 +6,4 @@ 2. 项集的支持度(support):数据集中包含该项集的记录所占的比例 3. 置信度(confidence):置信度({A}->{B}) = 支持度{A,B}/支持度{A} -Apriori原理:如果某个项集是频繁的,那么它的所有子集也是频繁的,反之,一个项集是非频繁的,那么它的所有超集也是非频繁的 \ No newline at end of file +Apriori原理:如果某个项集是频繁的,那么它的所有子集也是频繁的,反之,一个项集是非频繁的,那么它的所有超集也是非频繁的。 \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/apriori.py b/src/python/12.FrequentPattemTree/apriori.py new file mode 100644 index 00000000..e172b575 --- /dev/null +++ b/src/python/12.FrequentPattemTree/apriori.py @@ -0,0 +1,12 @@ +def loadDataSet(): + return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] +def createC1(dataSet): + c1=[] + for transaction in dataSet: + for item in transaction: + if not [item] in c1: + c1.append([item]) + c1.sort() + return map(frozenset,c1) +def scanD(D,ck,minSupport): + ssCnt = {} \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/fpGrowth.py b/src/python/12.FrequentPattemTree/fpGrowth.py new file mode 100644 index 00000000..5e3898cc --- /dev/null +++ b/src/python/12.FrequentPattemTree/fpGrowth.py @@ -0,0 +1,19 @@ +class treeNode: + def __init__(self,nameValue,numOccur,parentNode): + self.name = nameValue + self.count = numOccur + self.nodeLink = None + self.parent = parentNode + self.children = {} + def inc(self,numOccur): + self.count += numOccur + def disp(self,ind=1): + print(' '*ind,self.name,' ',self.count) + for child in self.children.values(): + child.disp(ind+1) + + if __name__ == "__main__": + import fpGrowth + rootNode = fpGrowth.treeNode('pyramid',9,None) + rootNode.children['eye']=fpGrowth.treeNode('eye',13,None) + rootNode.disp() \ No newline at end of file diff --git a/src/python/12.FrequentPattemTree/test.py b/src/python/12.FrequentPattemTree/test.py new file mode 100644 index 00000000..8702723e --- /dev/null +++ b/src/python/12.FrequentPattemTree/test.py @@ -0,0 +1,5 @@ +class Test: + if __name__ == "__main__": + fza=frozenset(['a','bc']) + adict={fza:1,'b':2} + print(adict) \ No newline at end of file