From d923016b3360b3c44db99fa018b15f7d26d840c5 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Fri, 7 Apr 2017 15:48:02 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=2011.Apriori=20=E8=BE=93?= =?UTF-8?q?=E5=85=A5=E6=96=87=E4=BB=B6=E4=BD=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Apriori_mushroom.dat => input/11.Apriori/mushroom.dat | 0 .../11.Apriori/recent20bills.txt | 0 src/python/11.Apriori/apriori.py | 6 +++--- 3 files changed, 3 insertions(+), 3 deletions(-) rename testData/Apriori_mushroom.dat => input/11.Apriori/mushroom.dat (100%) rename testData/Apriori_recent20bills.txt => input/11.Apriori/recent20bills.txt (100%) diff --git a/testData/Apriori_mushroom.dat b/input/11.Apriori/mushroom.dat similarity index 100% rename from testData/Apriori_mushroom.dat rename to input/11.Apriori/mushroom.dat diff --git a/testData/Apriori_recent20bills.txt b/input/11.Apriori/recent20bills.txt similarity index 100% rename from testData/Apriori_recent20bills.txt rename to input/11.Apriori/recent20bills.txt diff --git a/src/python/11.Apriori/apriori.py b/src/python/11.Apriori/apriori.py index b4daef2b..1f8e648d 100644 --- a/src/python/11.Apriori/apriori.py +++ b/src/python/11.Apriori/apriori.py @@ -239,7 +239,7 @@ def getActionIds(): votesmart.apikey = 'a7fa40adec6f4a77178799fae4441030' actionIdList = [] billTitleList = [] - fr = open('testData/Apriori_recent20bills.txt') + fr = open('input/11.Apriori/recent20bills.txt') for line in fr.readlines(): billNum = int(line.split('\t')[0]) try: @@ -306,7 +306,7 @@ def main(): # 现在的的测试 # 1. 加载数据 dataSet = loadDataSet() - print(dataSet) + print dataSet # 调用 apriori 做购物篮分析 # 支持度满足阈值的key集合L,和所有元素和支持度的全集suppoerData L, supportData = apriori(dataSet, minSupport=0.5) @@ -331,7 +331,7 @@ def main(): # # 项目实战 # # 发现毒蘑菇的相似特性 # # 得到全集的数据 - # dataSet = [line.split() for line in open("testData/Apriori_mushroom.dat").readlines()] + # dataSet = [line.split() for line in open("input/11.Apriori/mushroom.dat").readlines()] # L, supportData = apriori(dataSet, minSupport=0.3) # # 2表示毒蘑菇,1表示可食用的蘑菇 # # 找出关于2的频繁子项出来,就知道如果是毒蘑菇,那么出现频繁的也可能是毒蘑菇