diff --git a/testData/Apriori_mushroom.dat b/input/11.Apriori/mushroom.dat similarity index 100% rename from testData/Apriori_mushroom.dat rename to input/11.Apriori/mushroom.dat diff --git a/testData/Apriori_recent20bills.txt b/input/11.Apriori/recent20bills.txt similarity index 100% rename from testData/Apriori_recent20bills.txt rename to input/11.Apriori/recent20bills.txt diff --git a/src/python/11.Apriori/apriori.py b/src/python/11.Apriori/apriori.py index b4daef2b..1f8e648d 100644 --- a/src/python/11.Apriori/apriori.py +++ b/src/python/11.Apriori/apriori.py @@ -239,7 +239,7 @@ def getActionIds(): votesmart.apikey = 'a7fa40adec6f4a77178799fae4441030' actionIdList = [] billTitleList = [] - fr = open('testData/Apriori_recent20bills.txt') + fr = open('input/11.Apriori/recent20bills.txt') for line in fr.readlines(): billNum = int(line.split('\t')[0]) try: @@ -306,7 +306,7 @@ def main(): # 现在的的测试 # 1. 加载数据 dataSet = loadDataSet() - print(dataSet) + print dataSet # 调用 apriori 做购物篮分析 # 支持度满足阈值的key集合L,和所有元素和支持度的全集suppoerData L, supportData = apriori(dataSet, minSupport=0.5) @@ -331,7 +331,7 @@ def main(): # # 项目实战 # # 发现毒蘑菇的相似特性 # # 得到全集的数据 - # dataSet = [line.split() for line in open("testData/Apriori_mushroom.dat").readlines()] + # dataSet = [line.split() for line in open("input/11.Apriori/mushroom.dat").readlines()] # L, supportData = apriori(dataSet, minSupport=0.3) # # 2表示毒蘑菇,1表示可食用的蘑菇 # # 找出关于2的频繁子项出来,就知道如果是毒蘑菇,那么出现频繁的也可能是毒蘑菇