From 75fbb3866ce9431fa775f0be9f99a8d508a1224d Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Thu, 6 Jul 2017 13:15:37 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A8=E8=8D=90?= =?UTF-8?q?=E7=B3=BB=E7=BB=9F=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/python/16.RecommenderSystems/itemcf.py | 3 + .../sklearn-RS-demo-cf-item.py | 85 ++++++++++++++++ .../sklearn-RS-demo-cf.py | 99 ++++++++++++------- src/python/16.RecommenderSystems/usercf.py | 18 +++- 4 files changed, 169 insertions(+), 36 deletions(-) create mode 100644 src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py diff --git a/src/python/16.RecommenderSystems/itemcf.py b/src/python/16.RecommenderSystems/itemcf.py index 1ba67b95..eed6ed30 100644 --- a/src/python/16.RecommenderSystems/itemcf.py +++ b/src/python/16.RecommenderSystems/itemcf.py @@ -89,6 +89,7 @@ class ItemBasedCF(): print >> sys.stderr, 'counting movies number and popularity...' + # 统计在所有的用户中,不同电影的总出现次数 for user, movies in self.trainset.iteritems(): for movie in movies: # count item popularity @@ -175,6 +176,8 @@ class ItemBasedCF(): # varables for popularity popular_sum = 0 + # enumerate将其组成一个索引序列,利用它可以同时获得索引和值 + # 参考地址:http://blog.csdn.net/churximi/article/details/51648388 for i, user in enumerate(self.trainset): if i > 0 and i % 500 == 0: print >> sys.stderr, 'recommended for %d users' % i diff --git a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py new file mode 100644 index 00000000..b9285f13 --- /dev/null +++ b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# coding:utf8 + +from math import sqrt + +import numpy as np +import pandas as pd +from scipy.sparse.linalg import svds +from sklearn import cross_validation as cv +from sklearn.metrics import mean_squared_error +from sklearn.metrics.pairwise import pairwise_distances + + +def splitData(dataFile, test_size): + # 加载数据集 + header = ['user_id', 'item_id', 'rating', 'timestamp'] + df = pd.read_csv(dataFile, sep='\t', names=header) + + n_users = df.user_id.unique().shape[0] + n_items = df.item_id.unique().shape[0] + + print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) + train_data, test_data = cv.train_test_split(df, test_size=test_size) + return df, n_users, n_items, train_data, test_data + + +def calc_similarity(n_users, n_items, train_data, test_data): + # 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: + train_data_matrix = np.zeros((n_users, n_items)) + for line in train_data.itertuples(): + train_data_matrix[line[1]-1, line[2]-1] = line[3] + test_data_matrix = np.zeros((n_users, n_items)) + for line in test_data.itertuples(): + test_data_matrix[line[1]-1, line[2]-1] = line[3] + + # 使用sklearn的pairwise_distances函数来计算余弦相似性。 + user_similarity = pairwise_distances(train_data_matrix, metric="cosine") + item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine") + return train_data_matrix, test_data_matrix, user_similarity, item_similarity + + +def predict(rating, similarity, type='user'): + if type == 'user': + mean_user_rating = rating.mean(axis=1) + rating_diff = (rating - mean_user_rating[:, np.newaxis]) + pred = mean_user_rating[:, np.newaxis] + similarity.dot(rating_diff)/np.array([np.abs(similarity).sum(axis=1)]).T + elif type == 'item': + pred = rating.dot(similarity)/np.array([np.abs(similarity).sum(axis=1)]) + return pred + + +def rmse(prediction, ground_truth): + prediction = prediction[ground_truth.nonzero()].flatten() + ground_truth = ground_truth[ground_truth.nonzero()].flatten() + return sqrt(mean_squared_error(prediction, ground_truth)) + + +if __name__ == "__main__": + # 基于模型的协同过滤 + # ... + # 拆分数据集 + # http://files.grouplens.org/datasets/movielens/ml-100k.zip + dataFile = 'input/16.RecommenderSystems/ml-100k/u.data' + df, n_users, n_items, train_data, test_data = splitData(dataFile, test_size=0.25) + + # 计算相似度 + train_data_matrix, test_data_matrix, user_similarity, item_similarity = calc_similarity(n_users, n_items, train_data, test_data) + + user_prediction = predict(train_data_matrix, user_similarity, type='user') + item_prediction = predict(train_data_matrix, item_similarity, type='item') + + # 评估:均方根误差 + print 'User based CF RMSE: ' + str(rmse(user_prediction, test_data_matrix)) + print 'Item based CF RMSE: ' + str(rmse(item_prediction, test_data_matrix)) + + # 基于模型的协同过滤 + # ... + # 计算MovieLens数据集的稀疏度 + sparsity = round(1.0 - len(df)/float(n_users*n_items), 3) + print 'The sparsity level of MovieLen100K is ' + str(sparsity * 100) + '%' + + u, s, vt = svds(train_data_matrix, k=20) + s_diag_matrix = np.diag(s) + x_pred = np.dot(np.dot(u, s_diag_matrix), vt) + print 'Model based CF RMSE: ' + str(rmse(x_pred, test_data_matrix)) diff --git a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py index f060d076..5a5bf4c7 100644 --- a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py +++ b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py @@ -10,59 +10,92 @@ from sklearn import cross_validation as cv from sklearn.metrics import mean_squared_error from sklearn.metrics.pairwise import pairwise_distances -# 加载数据集 -header = ['user_id', 'item_id', 'rating', 'timestamp'] -# http://files.grouplens.org/datasets/movielens/ml-100k.zip -dataFile = 'input/16.RecommenderSystems/ml-100k/u.data' -df = pd.read_csv(dataFile, sep='\t', names=header) -n_users = df.user_id.unique().shape[0] -n_items = df.item_id.unique().shape[0] -print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) +def splitData(dataFile, test_size): + # 加载数据集 + header = ['user_id', 'item_id', 'rating', 'timestamp'] + df = pd.read_csv(dataFile, sep='\t', names=header) -# 拆分数据集 -train_data, test_data = cv.train_test_split(df, test_size=0.25) + n_users = df.user_id.unique().shape[0] + n_items = df.item_id.unique().shape[0] -# 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: -train_data_matrix = np.zeros((n_users, n_items)) -for line in train_data.itertuples(): - train_data_matrix[line[1]-1, line[2]-1] = line[3] -test_data_matrix = np.zeros((n_users, n_items)) -for line in test_data.itertuples(): - test_data_matrix[line[1]-1, line[2]-1] = line[3] -# 使用sklearn的pairwise_distances函数来计算余弦相似性。 -user_similarity = pairwise_distances(train_data_matrix, metric="cosine") -item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine") + print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) + train_data, test_data = cv.train_test_split(df, test_size=test_size) + return df, n_users, n_items, train_data, test_data + + +def calc_similarity(n_users, n_items, train_data, test_data): + # 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: + train_data_matrix = np.zeros((n_users, n_items)) + for line in train_data.itertuples(): + train_data_matrix[line[1]-1, line[2]-1] = line[3] + test_data_matrix = np.zeros((n_users, n_items)) + for line in test_data.itertuples(): + test_data_matrix[line[1]-1, line[2]-1] = line[3] + + # 使用sklearn的pairwise_distances函数来计算余弦相似性。 + print "1:", np.shape(train_data_matrix) # 行:人,列:电影 + print "2:", np.shape(train_data_matrix.T) # 行:电影,列:人 + user_similarity = pairwise_distances(train_data_matrix, metric="cosine") + item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine") + return train_data_matrix, test_data_matrix, user_similarity, item_similarity def predict(rating, similarity, type='user'): + print type + print "rating=", np.shape(rating) + print "similarity=", np.shape(similarity) if type == 'user': + # 求出每一个用户,所有电影的综合评分(axis=0 表示对列操作, 1表示对行操作) + # print "rating=", np.shape(rating) mean_user_rating = rating.mean(axis=1) + # np.newaxis参考地址: http://blog.csdn.net/xtingjie/article/details/72510834 + # print "mean_user_rating=", np.shape(mean_user_rating) + # print "mean_user_rating.newaxis=", np.shape(mean_user_rating[:, np.newaxis]) rating_diff = (rating - mean_user_rating[:, np.newaxis]) + # print "rating=", rating[:3, :3] + # print "mean_user_rating[:, np.newaxis]=", mean_user_rating[:, np.newaxis][:3, :3] + # print "rating_diff=", rating_diff[:3, :3] + + # 均分 + 人-人-距离(943, 943)*人-电影-评分diff(943, 1682)=结果-人-电影(每个人对同一电影的综合得分)(943, 1682) 再除以 个人与其他人总的距离 = 人-电影综合得分 pred = mean_user_rating[:, np.newaxis] + similarity.dot(rating_diff)/np.array([np.abs(similarity).sum(axis=1)]).T elif type == 'item': - pred = rating.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)]) + # 综合打分: 人-电影-评分(943, 1682)*电影-电影-距离1682, 1682)=结果-人-电影(各个电影对同一电影的综合得分)(943, 1682) / 再除以 电影与其他电影总的距离 = 人-电影综合得分 + pred = rating.dot(similarity)/np.array([np.abs(similarity).sum(axis=1)]) return pred -user_prediction = predict(train_data_matrix, user_similarity, type='user') -item_prediction = predict(train_data_matrix, item_similarity, type='item') - - def rmse(prediction, ground_truth): prediction = prediction[ground_truth.nonzero()].flatten() ground_truth = ground_truth[ground_truth.nonzero()].flatten() return sqrt(mean_squared_error(prediction, ground_truth)) -print 'User based CF RMSE: ' + str(rmse(user_prediction, test_data_matrix)) -print 'Item based CF RMSe: ' + str(rmse(item_prediction, test_data_matrix)) +if __name__ == "__main__": + # 基于模型的协同过滤 + # ... + # 拆分数据集 + # http://files.grouplens.org/datasets/movielens/ml-100k.zip + dataFile = 'input/16.RecommenderSystems/ml-100k/u.data' + df, n_users, n_items, train_data, test_data = splitData(dataFile, test_size=0.25) -sparsity = round(1.0 - len(df)/float(n_users*n_items), 3) -print 'The sparsity level of MovieLen100K is ' + str(sparsity * 100) + '%' + # 计算相似度 + train_data_matrix, test_data_matrix, user_similarity, item_similarity = calc_similarity(n_users, n_items, train_data, test_data) + user_prediction = predict(train_data_matrix, user_similarity, type='user') + item_prediction = predict(train_data_matrix, item_similarity, type='item') -u, s, vt = svds(train_data_matrix, k=20) -s_diag_matrix = np.diag(s) -x_pred = np.dot(np.dot(u, s_diag_matrix), vt) -print 'User-based CF MSE: ' + str(rmse(x_pred, test_data_matrix)) + # 评估:均方根误差 + print 'User based CF RMSE: ' + str(rmse(user_prediction, test_data_matrix)) + print 'Item based CF RMSE: ' + str(rmse(item_prediction, test_data_matrix)) + + # 基于模型的协同过滤 + # ... + # 计算MovieLens数据集的稀疏度 + sparsity = round(1.0 - len(df)/float(n_users*n_items), 3) + print 'The sparsity level of MovieLen100K is ' + str(sparsity * 100) + '%' + + u, s, vt = svds(train_data_matrix, k=20) + s_diag_matrix = np.diag(s) + x_pred = np.dot(np.dot(u, s_diag_matrix), vt) + print 'Model based CF RMSE: ' + str(rmse(x_pred, test_data_matrix)) diff --git a/src/python/16.RecommenderSystems/usercf.py b/src/python/16.RecommenderSystems/usercf.py index fc528292..fbe19640 100644 --- a/src/python/16.RecommenderSystems/usercf.py +++ b/src/python/16.RecommenderSystems/usercf.py @@ -92,6 +92,8 @@ class UserBasedCF(): print >> sys.stderr, 'building movie-users inverse table...' movie2users = dict() + # 同一个电影中,收集用户的集合 + # 统计在所有的用户中,不同电影的总出现次数 for user, movies in self.trainset.iteritems(): for movie in movies: # inverse table for item-users @@ -155,16 +157,24 @@ class UserBasedCF(): watched_movies = self.trainset[user] # 计算top K 用户的相似度 - # v=similar user, wuv=不同用户同时出现的次数 + # v=similar user, wuv=不同用户同时出现的次数,根据wuv倒序从大到小选出K个用户进行排列 # 耗时分析:50.4%的时间在 line-160行 for v, wuv in sorted(self.user_sim_mat[user].items(), key=itemgetter(1), reverse=True)[0:K]: - for movie in self.trainset[v]: + for movie, rating in self.trainset[v].iteritems(): if movie in watched_movies: continue # predict the user's "interest" for each movie rank.setdefault(movie, 0) - rank[movie] += wuv + rank[movie] += wuv * rating # return the N best movies + + """ + wuv + precision=0.3766 recall=0.0759 coverage=0.3183 popularity=6.9194 + + wuv * rating + precision=0.3865 recall=0.0779 coverage=0.2681 popularity=7.0116 + """ return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N] def evaluate(self): @@ -183,6 +193,8 @@ class UserBasedCF(): # varables for popularity popular_sum = 0 + # enumerate将其组成一个索引序列,利用它可以同时获得索引和值 + # 参考地址:http://blog.csdn.net/churximi/article/details/51648388 for i, user in enumerate(self.trainset): if i > 0 and i % 500 == 0: print >> sys.stderr, 'recommended for %d users' % i From 28b26de4bb4cf1824fdba83564285b3e3a1110b0 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Sat, 8 Jul 2017 22:49:58 +0800 Subject: [PATCH 02/10] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=88=90=E5=8A=9F-?= =?UTF-8?q?=E6=8E=A8=E8=8D=90=E7=B3=BB=E7=BB=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{itemcf.py => RS-itemcf.py} | 12 +- ...arn-RS-demo-cf.py => RS-sklearn-rating.py} | 95 +++++++-- .../{usercf.py => RS-usercf.py} | 6 +- .../python/Recommender.py | 28 +++ .../sklearn-RS-demo-cf-item-test.py | 185 ++++++++++++++++++ .../sklearn-RS-demo-cf-item.py | 85 -------- .../16.RecommenderSystems/test_graph-based.py | 1 - 7 files changed, 309 insertions(+), 103 deletions(-) rename src/python/16.RecommenderSystems/{itemcf.py => RS-itemcf.py} (95%) rename src/python/16.RecommenderSystems/{sklearn-RS-demo-cf.py => RS-sklearn-rating.py} (54%) rename src/python/16.RecommenderSystems/{usercf.py => RS-usercf.py} (97%) create mode 100644 src/python/16.RecommenderSystems/python/Recommender.py create mode 100644 src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item-test.py delete mode 100644 src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py diff --git a/src/python/16.RecommenderSystems/itemcf.py b/src/python/16.RecommenderSystems/RS-itemcf.py similarity index 95% rename from src/python/16.RecommenderSystems/itemcf.py rename to src/python/16.RecommenderSystems/RS-itemcf.py index eed6ed30..46d55d38 100644 --- a/src/python/16.RecommenderSystems/itemcf.py +++ b/src/python/16.RecommenderSystems/RS-itemcf.py @@ -65,7 +65,8 @@ class ItemBasedCF(): for line in self.loadfile(filename): # 用户ID,电影名称,评分,时间戳 - user, movie, rating, _ = line.split('::') + # user, movie, rating, _ = line.split('::') + user, movie, rating, _ = line.split('\t') # 通过pivot和随机函数比较,然后初始化用户和对应的值 if (random.random() < pivot): @@ -203,7 +204,8 @@ class ItemBasedCF(): if __name__ == '__main__': - ratingfile = 'input/16.RecommenderSystems/ml-1m/ratings.dat' + # ratingfile = 'input/16.RecommenderSystems/ml-1m/ratings.dat' + ratingfile = 'input/16.RecommenderSystems/ml-100k/u.data' # 创建ItemCF对象 itemcf = ItemBasedCF() @@ -212,4 +214,8 @@ if __name__ == '__main__': # 计算用户之间的相似度 itemcf.calc_movie_sim() # 评估推荐效果 - itemcf.evaluate() + # itemcf.evaluate() + # 查看推荐结果用户 + user = "2" + print "推荐结果", itemcf.recommend(user) + print "---", itemcf.testset.get(user, {}) diff --git a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py b/src/python/16.RecommenderSystems/RS-sklearn-rating.py similarity index 54% rename from src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py rename to src/python/16.RecommenderSystems/RS-sklearn-rating.py index 5a5bf4c7..f20a6784 100644 --- a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf.py +++ b/src/python/16.RecommenderSystems/RS-sklearn-rating.py @@ -1,7 +1,9 @@ #!/usr/bin/python # coding:utf8 -from math import sqrt +import sys +import math +from operator import itemgetter import numpy as np import pandas as pd @@ -36,9 +38,23 @@ def calc_similarity(n_users, n_items, train_data, test_data): # 使用sklearn的pairwise_distances函数来计算余弦相似性。 print "1:", np.shape(train_data_matrix) # 行:人,列:电影 print "2:", np.shape(train_data_matrix.T) # 行:电影,列:人 + user_similarity = pairwise_distances(train_data_matrix, metric="cosine") item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine") - return train_data_matrix, test_data_matrix, user_similarity, item_similarity + + print >> sys.stderr, '开始统计流行item的数量...' + item_popular = {} + # 统计在所有的用户中,不同电影的总出现次数 + for i_index in range(n_items): + if np.sum(train_data_matrix[:, i_index]) != 0: + item_popular[i_index] = np.sum(train_data_matrix[:, i_index]!=0) + # print "pop=", i_index, self.item_popular[i_index] + + # save the total number of items + item_count = len(item_popular) + print >> sys.stderr, '总共流行item数量 = %d' % item_count + + return train_data_matrix, test_data_matrix, user_similarity, item_similarity, item_popular def predict(rating, similarity, type='user'): @@ -60,7 +76,7 @@ def predict(rating, similarity, type='user'): # 均分 + 人-人-距离(943, 943)*人-电影-评分diff(943, 1682)=结果-人-电影(每个人对同一电影的综合得分)(943, 1682) 再除以 个人与其他人总的距离 = 人-电影综合得分 pred = mean_user_rating[:, np.newaxis] + similarity.dot(rating_diff)/np.array([np.abs(similarity).sum(axis=1)]).T elif type == 'item': - # 综合打分: 人-电影-评分(943, 1682)*电影-电影-距离1682, 1682)=结果-人-电影(各个电影对同一电影的综合得分)(943, 1682) / 再除以 电影与其他电影总的距离 = 人-电影综合得分 + # 综合打分: 人-电影-评分(943, 1682)*电影-电影-距离(1682, 1682)=结果-人-电影(各个电影对同一电影的综合得分)(943, 1682) / 再除以 电影与其他电影总的距离 = 人-电影综合得分 pred = rating.dot(similarity)/np.array([np.abs(similarity).sum(axis=1)]) return pred @@ -68,11 +84,51 @@ def predict(rating, similarity, type='user'): def rmse(prediction, ground_truth): prediction = prediction[ground_truth.nonzero()].flatten() ground_truth = ground_truth[ground_truth.nonzero()].flatten() - return sqrt(mean_squared_error(prediction, ground_truth)) + return math.sqrt(mean_squared_error(prediction, ground_truth)) + + +def evaluate(prediction, item_popular, name): + hit = 0 + rec_count = 0 + test_count = 0 + popular_sum = 0 + all_rec_items = set() + for u_index in range(n_users): + items = np.where(train_data_matrix[u_index, :] == 0)[0] + pre_items = sorted(dict(zip(items, prediction[u_index, items])).items(), key=itemgetter(1), reverse=True)[: 20] + test_items = np.where(test_data_matrix[u_index, :] != 0)[0] + + # 对比测试集和推荐集的差异 + for item, w in pre_items: + if item in test_items: + hit += 1 + all_rec_items.add(item) + + # 计算用户对应的电影出现次数log值的sum加和 + if item in item_popular: + popular_sum += math.log(1 + item_popular[item]) + + rec_count += len(pre_items) + test_count += len(test_items) + + precision = hit / (1.0 * rec_count) + recall = hit / (1.0 * test_count) + coverage = len(all_rec_items) / (1.0 * len(item_popular)) + popularity = popular_sum / (1.0 * rec_count) + print >> sys.stderr, '%s: precision=%.4f \t recall=%.4f \t coverage=%.4f \t popularity=%.4f' % (name, precision, recall, coverage, popularity) + + +def recommend(u_index, prediction): + items = np.where(train_data_matrix[u_index, :] == 0)[0] + pre_items = sorted(dict(zip(items, prediction[u_index, items])).items(), key=itemgetter(1), reverse=True)[: 10] + test_items = np.where(test_data_matrix[u_index, :] != 0)[0] + + print '原始结果:', test_items + print '推荐结果:', [key for key, value in pre_items] if __name__ == "__main__": - # 基于模型的协同过滤 + # 基于内存的协同过滤 # ... # 拆分数据集 # http://files.grouplens.org/datasets/movielens/ml-100k.zip @@ -80,22 +136,37 @@ if __name__ == "__main__": df, n_users, n_items, train_data, test_data = splitData(dataFile, test_size=0.25) # 计算相似度 - train_data_matrix, test_data_matrix, user_similarity, item_similarity = calc_similarity(n_users, n_items, train_data, test_data) + train_data_matrix, test_data_matrix, user_similarity, item_similarity, item_popular = calc_similarity(n_users, n_items, train_data, test_data) - user_prediction = predict(train_data_matrix, user_similarity, type='user') item_prediction = predict(train_data_matrix, item_similarity, type='item') + user_prediction = predict(train_data_matrix, user_similarity, type='user') # 评估:均方根误差 - print 'User based CF RMSE: ' + str(rmse(user_prediction, test_data_matrix)) print 'Item based CF RMSE: ' + str(rmse(item_prediction, test_data_matrix)) + print 'User based CF RMSE: ' + str(rmse(user_prediction, test_data_matrix)) # 基于模型的协同过滤 # ... - # 计算MovieLens数据集的稀疏度 + # 计算MovieLens数据集的稀疏度 (n_users,n_items 是常量,所以,用户行为数据越少,意味着信息量少;越稀疏,优化的空间也越大) sparsity = round(1.0 - len(df)/float(n_users*n_items), 3) print 'The sparsity level of MovieLen100K is ' + str(sparsity * 100) + '%' - u, s, vt = svds(train_data_matrix, k=20) + # 计算稀疏矩阵的最大k个奇异值/向量 + u, s, vt = svds(train_data_matrix, k=15) s_diag_matrix = np.diag(s) - x_pred = np.dot(np.dot(u, s_diag_matrix), vt) - print 'Model based CF RMSE: ' + str(rmse(x_pred, test_data_matrix)) + svd_prediction = np.dot(np.dot(u, s_diag_matrix), vt) + print 'Model based CF RMSE: ' + str(rmse(svd_prediction, test_data_matrix)) + + """ + 在信息量相同的情况下,矩阵越小,那么携带的信息越可靠。 + 所以:user-cf 推荐效果高于 item-cf; 而svd分解后,发现15个维度效果就能达到90%以上,所以信息更可靠,效果也更好。 + item-cf: 1682 + user-cf: 943 + svd: 15 + """ + evaluate(item_prediction, item_popular, 'item') + evaluate(user_prediction, item_popular, 'user') + evaluate(svd_prediction, item_popular, 'svd') + + # 推荐结果 + recommend(1, svd_prediction) diff --git a/src/python/16.RecommenderSystems/usercf.py b/src/python/16.RecommenderSystems/RS-usercf.py similarity index 97% rename from src/python/16.RecommenderSystems/usercf.py rename to src/python/16.RecommenderSystems/RS-usercf.py index fbe19640..ce84bc3d 100644 --- a/src/python/16.RecommenderSystems/usercf.py +++ b/src/python/16.RecommenderSystems/RS-usercf.py @@ -65,7 +65,8 @@ class UserBasedCF(): for line in self.loadfile(filename): # 用户ID,电影名称,评分,时间戳 - user, movie, rating, timestamp = line.split('::') + # user, movie, rating, timestamp = line.split('::') + user, movie, rating, timestamp = line.split('\t') # 通过pivot和随机函数比较,然后初始化用户和对应的值 if (random.random() < pivot): @@ -220,7 +221,8 @@ class UserBasedCF(): if __name__ == '__main__': - ratingfile = 'input/16.RecommenderSystems/ml-1m/ratings.dat' + # ratingfile = 'input/16.RecommenderSystems/ml-1m/ratings.dat' + ratingfile = 'input/16.RecommenderSystems/ml-100k/u.data' # 创建UserCF对象 usercf = UserBasedCF() diff --git a/src/python/16.RecommenderSystems/python/Recommender.py b/src/python/16.RecommenderSystems/python/Recommender.py new file mode 100644 index 00000000..40acbb04 --- /dev/null +++ b/src/python/16.RecommenderSystems/python/Recommender.py @@ -0,0 +1,28 @@ +import numpy as np + + +# 自定义杰卡德相似系数函数,仅对0-1矩阵有效 +def Jaccard(a, b): + return 1.0*(a*b).sum()/(a+b-a*b).sum() + + +class Recommender(): + + # 相似度矩阵 + sim = None + + # 计算相似度矩阵的函数 + def similarity(self, x, distance): + y = np.ones((len(x), len(x))) + for i in range(len(x)): + for j in range(len(x)): + y[i, j] = distance(x[i], x[j]) + return y + + # 训练函数 + def fit(self, x, distance=Jaccard): + self.sim = self.similarity(x, distance) + + # 推荐函数 + def recommend(self, a): + return np.dot(self.sim, a)*(1-a) diff --git a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item-test.py b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item-test.py new file mode 100644 index 00000000..52a7699a --- /dev/null +++ b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item-test.py @@ -0,0 +1,185 @@ +#!/usr/bin/python +# coding:utf8 + +''' +Created on 2015-06-22 +Update on 2017-05-16 +@author: Lockvictor/片刻 +《推荐系统实践》协同过滤算法源代码 +参考地址:https://github.com/Lockvictor/MovieLens-RecSys +更新地址:https://github.com/apachecn/MachineLearning +''' +import math +import random +import sys +from operator import itemgetter + +import numpy as np +import pandas as pd +from sklearn import cross_validation as cv +from sklearn.metrics.pairwise import pairwise_distances + +print(__doc__) +# 作用:使得随机数据可预测 +random.seed(0) + + +class ItemBasedCF(): + ''' TopN recommendation - ItemBasedCF ''' + def __init__(self): + # 拆分数据集 + self.train_mat = {} + self.test_mat = {} + + # 总用户数 + self.n_users = 0 + self.n_items = 0 + + # n_sim_user: top 20个用户, n_rec_item: top 10个推荐结果 + self.n_sim_item = 20 + self.n_rec_item = 10 + + # item_mat_similarity: 电影之间的相似度, item_popular: 电影的出现次数, item_count: 总电影数量 + self.item_mat_similarity = {} + self.item_popular = {} + self.item_count = 0 + + print >> sys.stderr, 'Similar item number = %d' % self.n_sim_item + print >> sys.stderr, 'Recommended item number = %d' % self.n_rec_item + + def splitData(self, dataFile, test_size): + # 加载数据集 + header = ['user_id', 'item_id', 'rating', 'timestamp'] + df = pd.read_csv(dataFile, sep='\t', names=header) + + self.n_users = df.user_id.unique().shape[0] + self.n_items = df.item_id.unique().shape[0] + + print 'Number of users = ' + str(self.n_users) + ' | Number of items = ' + str(self.n_items) + + # 拆分数据集: 用户+电影 + self.train_data, self.test_data = cv.train_test_split(df, test_size=test_size) + print >> sys.stderr, '分离训练集和测试集成功' + print >> sys.stderr, 'len(train) = %s' % np.shape(self.train_data)[0] + print >> sys.stderr, 'len(test) = %s' % np.shape(self.test_data)[0] + + def calc_similarity(self): + # 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: + self.train_mat = np.zeros((self.n_users, self.n_items)) + for line in self.train_data.itertuples(): + self.train_mat[int(line.user_id)-1, int(line.item_id)-1] = float(line.rating) + self.test_mat = np.zeros((self.n_users, self.n_items)) + for line in self.test_data.itertuples(): + # print "line", line.user_id-1, line.item_id-1, line.rating + self.test_mat[int(line.user_id)-1, int(line.item_id)-1] = float(line.rating) + + # 使用sklearn的pairwise_distances函数来计算余弦相似性。 + print "1:", np.shape(np.mat(self.train_mat).T) # 行:电影,列:人 + # 电影-电影-距离(1682, 1682) + self.item_mat_similarity = pairwise_distances(np.mat(self.train_mat).T, metric='cosine') + print >> sys.stderr, 'item_mat_similarity=', np.shape(self.item_mat_similarity) + + print >> sys.stderr, '开始统计流行item的数量...' + + # 统计在所有的用户中,不同电影的总出现次数 + for i_index in range(self.n_items): + if np.sum(self.train_mat[:, i_index]) != 0: + self.item_popular[i_index] = np.sum(self.train_mat[:, i_index]!=0) + # print "pop=", i_index, self.item_popular[i_index] + + # save the total number of items + self.item_count = len(self.item_popular) + print >> sys.stderr, '总共流行item数量 = %d' % self.item_count + + # @profile + def recommend(self, u_index): + """recommend(找出top K的电影,对电影进行相似度sum的排序,取出top N的电影数) + + Args: + u_index 用户_ID-1=用户index + Returns: + rec_item 电影推荐列表,按照相似度从大到小的排序 + """ + ''' Find K similar items and recommend N items. ''' + K = self.n_sim_item + N = self.n_rec_item + rank = {} + i_items = np.where(self.train_mat[u_index, :] != 0)[0] + # print "i_items=", i_items + watched_items = dict(zip(i_items, self.train_mat[u_index, i_items])) + + # 计算top K 电影的相似度 + # rating=电影评分, w=不同电影出现的次数 + # 耗时分析:98.2%的时间在 line-154行 + for i_item, rating in watched_items.iteritems(): + i_other_items = np.where(self.item_mat_similarity[i_item, :] != 0)[0] + for related_item, w in sorted(dict(zip(i_other_items, self.item_mat_similarity[i_item, i_other_items])).items(), key=itemgetter(1), reverse=True)[0:K]: + if related_item in watched_items: + continue + rank.setdefault(related_item, 0) + rank[related_item] += w * rating + + # return the N best items + return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N] + + def evaluate(self): + ''' return precision, recall, coverage and popularity ''' + print >> sys.stderr, 'Evaluation start...' + + # varables for precision and recall + # hit表示命中(测试集和推荐集相同+1),rec_count 每个用户的推荐数, test_count 每个用户对应的测试数据集的电影数 + hit = 0 + rec_count = 0 + test_count = 0 + # varables for coverage + all_rec_items = set() + # varables for popularity + popular_sum = 0 + + # enumerate 将其组成一个索引序列,利用它可以同时获得索引和值 + # 参考地址:http://blog.csdn.net/churximi/article/details/51648388 + for u_index in range(50): + if u_index > 0 and u_index % 10 == 0: + print >> sys.stderr, 'recommended for %d users' % u_index + print "u_index", u_index + + # 对比测试集和推荐集的差异 + rec_items = self.recommend(u_index) + print "rec_items=", rec_items + for item, w in rec_items: + # print 'test_mat[u_index, item]=', item, self.test_mat[u_index, item] + + if self.test_mat[u_index, item] != 0: + hit += 1 + print "self.test_mat[%d, %d]=%s" % (u_index, item, self.test_mat[u_index, item]) + # 计算用户对应的电影出现次数log值的sum加和 + if item in self.item_popular: + popular_sum += math.log(1 + self.item_popular[item]) + + rec_count += len(rec_items) + test_count += np.sum(self.test_mat[u_index, :] != 0) + # print "test_count=", np.sum(self.test_mat[u_index, :] != 0), np.sum(self.train_mat[u_index, :] != 0) + + print("-------", hit, rec_count) + precision = hit / (1.0 * rec_count) + recall = hit / (1.0 * test_count) + coverage = len(all_rec_items) / (1.0 * self.item_count) + popularity = popular_sum / (1.0 * rec_count) + + print >> sys.stderr, 'precision=%.4f \t recall=%.4f \t coverage=%.4f \t popularity=%.4f' % (precision, recall, coverage, popularity) + + +if __name__ == '__main__': + dataFile = 'input/16.RecommenderSystems/ml-100k/u.data' + + # 创建ItemCF对象 + itemcf = ItemBasedCF() + # 将数据按照 7:3的比例,拆分成:训练集和测试集,存储在usercf的trainset和testset中 + itemcf.splitData(dataFile, test_size=0.3) + # 计算用户之间的相似度 + itemcf.calc_similarity() + # 评估推荐效果 + # itemcf.evaluate() + # 查看推荐结果用户 + print "推荐结果", itemcf.recommend(u_index=1) + print "---", np.where(itemcf.test_mat[1, :] != 0)[0] diff --git a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py b/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py deleted file mode 100644 index b9285f13..00000000 --- a/src/python/16.RecommenderSystems/sklearn-RS-demo-cf-item.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/python -# coding:utf8 - -from math import sqrt - -import numpy as np -import pandas as pd -from scipy.sparse.linalg import svds -from sklearn import cross_validation as cv -from sklearn.metrics import mean_squared_error -from sklearn.metrics.pairwise import pairwise_distances - - -def splitData(dataFile, test_size): - # 加载数据集 - header = ['user_id', 'item_id', 'rating', 'timestamp'] - df = pd.read_csv(dataFile, sep='\t', names=header) - - n_users = df.user_id.unique().shape[0] - n_items = df.item_id.unique().shape[0] - - print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) - train_data, test_data = cv.train_test_split(df, test_size=test_size) - return df, n_users, n_items, train_data, test_data - - -def calc_similarity(n_users, n_items, train_data, test_data): - # 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: - train_data_matrix = np.zeros((n_users, n_items)) - for line in train_data.itertuples(): - train_data_matrix[line[1]-1, line[2]-1] = line[3] - test_data_matrix = np.zeros((n_users, n_items)) - for line in test_data.itertuples(): - test_data_matrix[line[1]-1, line[2]-1] = line[3] - - # 使用sklearn的pairwise_distances函数来计算余弦相似性。 - user_similarity = pairwise_distances(train_data_matrix, metric="cosine") - item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine") - return train_data_matrix, test_data_matrix, user_similarity, item_similarity - - -def predict(rating, similarity, type='user'): - if type == 'user': - mean_user_rating = rating.mean(axis=1) - rating_diff = (rating - mean_user_rating[:, np.newaxis]) - pred = mean_user_rating[:, np.newaxis] + similarity.dot(rating_diff)/np.array([np.abs(similarity).sum(axis=1)]).T - elif type == 'item': - pred = rating.dot(similarity)/np.array([np.abs(similarity).sum(axis=1)]) - return pred - - -def rmse(prediction, ground_truth): - prediction = prediction[ground_truth.nonzero()].flatten() - ground_truth = ground_truth[ground_truth.nonzero()].flatten() - return sqrt(mean_squared_error(prediction, ground_truth)) - - -if __name__ == "__main__": - # 基于模型的协同过滤 - # ... - # 拆分数据集 - # http://files.grouplens.org/datasets/movielens/ml-100k.zip - dataFile = 'input/16.RecommenderSystems/ml-100k/u.data' - df, n_users, n_items, train_data, test_data = splitData(dataFile, test_size=0.25) - - # 计算相似度 - train_data_matrix, test_data_matrix, user_similarity, item_similarity = calc_similarity(n_users, n_items, train_data, test_data) - - user_prediction = predict(train_data_matrix, user_similarity, type='user') - item_prediction = predict(train_data_matrix, item_similarity, type='item') - - # 评估:均方根误差 - print 'User based CF RMSE: ' + str(rmse(user_prediction, test_data_matrix)) - print 'Item based CF RMSE: ' + str(rmse(item_prediction, test_data_matrix)) - - # 基于模型的协同过滤 - # ... - # 计算MovieLens数据集的稀疏度 - sparsity = round(1.0 - len(df)/float(n_users*n_items), 3) - print 'The sparsity level of MovieLen100K is ' + str(sparsity * 100) + '%' - - u, s, vt = svds(train_data_matrix, k=20) - s_diag_matrix = np.diag(s) - x_pred = np.dot(np.dot(u, s_diag_matrix), vt) - print 'Model based CF RMSE: ' + str(rmse(x_pred, test_data_matrix)) diff --git a/src/python/16.RecommenderSystems/test_graph-based.py b/src/python/16.RecommenderSystems/test_graph-based.py index 2dca9e48..12d22403 100644 --- a/src/python/16.RecommenderSystems/test_graph-based.py +++ b/src/python/16.RecommenderSystems/test_graph-based.py @@ -14,4 +14,3 @@ def PersonalRank(G, alpha, root): tmp[j] += 1 - alpha rank = tmp return rank - From 1976527c96b627e2cf7debc0fbf8d73ce59dd2af Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Sun, 9 Jul 2017 01:20:59 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=E6=8E=A8=E8=8D=90=E7=B3=BB=E7=BB=9F?= =?UTF-8?q?=E6=9C=80=E7=BB=88=E4=BC=98=E5=8C=96=E7=BB=93=E6=9E=9C-?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/16.推荐系统.md | 14 ++++++++++++++ .../16.RecommenderSystems/RS-sklearn-rating.py | 2 ++ 2 files changed, 16 insertions(+) diff --git a/docs/16.推荐系统.md b/docs/16.推荐系统.md index 7ec39255..7e5df78d 100644 --- a/docs/16.推荐系统.md +++ b/docs/16.推荐系统.md @@ -17,6 +17,19 @@ ![基于知识的推荐](/images/16.RecommendedSystem/基于知识的推荐.jpg) +## 协同过滤推荐 + +* memory-based推荐 + * Item-based方法 + * User-based方法 + * Memory-based推荐方法通过执行最近邻搜索,把每一个Item或者User看成一个向量,计算其他所有Item或者User与它的相似度。有了Item或者User之间的两两相似度之后,就可以进行预测与推荐了。 +* model-based推荐 + * Model-based推荐最常见的方法为Matrix factorization. + * 矩阵分解通过把原始的评分矩阵R分解为两个矩阵相乘,并且只考虑有评分的值,训练时不考虑missing项的值。R矩阵分解成为U与V两个矩阵后,评分矩阵R中missing的值就可以通过U矩阵中的某列和V矩阵的某行相乘得到 + * 矩阵分解的目标函数: U矩阵与V矩阵的可以通过梯度下降(gradient descent)算法求得,通过交替更新u与v多次迭代收敛之后可求出U与V。 + * 矩阵分解背后的核心思想,找到两个矩阵,它们相乘之后得到的那个矩阵的值,与评分矩阵R中有值的位置中的值尽可能接近。这样一来,分解出来的两个矩阵相乘就尽可能还原了评分矩阵R,因为有值的地方,值都相差得尽可能地小,那么missing的值通过这样的方式计算得到,比较符合趋势。 +* 协同过滤中主要存在如下两个问题:稀疏性与冷启动问题。已有的方案通常会通过引入多个不同的数据源或者辅助信息(Side information)来解决这些问题,用户的Side information可以是用户的基本个人信息、用户画像信息等,而Item的Side information可以是物品的content信息等。 + * * * * **作者:[片刻](http://www.apache.wiki/display/~jiangzhonglian)** @@ -27,3 +40,4 @@ * [推荐系统中常用算法 以及优点缺点对比](http://www.36dsj.com/archives/9519) * [推荐算法的基于知识推荐](https://zhidao.baidu.com/question/2013524494179442228.html) +* [推荐系统中基于深度学习的混合协同过滤模型](http://www.iteye.com/news/32100) diff --git a/src/python/16.RecommenderSystems/RS-sklearn-rating.py b/src/python/16.RecommenderSystems/RS-sklearn-rating.py index f20a6784..f00200fc 100644 --- a/src/python/16.RecommenderSystems/RS-sklearn-rating.py +++ b/src/python/16.RecommenderSystems/RS-sklearn-rating.py @@ -128,6 +128,7 @@ def recommend(u_index, prediction): if __name__ == "__main__": + # 基于内存的协同过滤 # ... # 拆分数据集 @@ -155,6 +156,7 @@ if __name__ == "__main__": u, s, vt = svds(train_data_matrix, k=15) s_diag_matrix = np.diag(s) svd_prediction = np.dot(np.dot(u, s_diag_matrix), vt) + print "svd-shape:", np.shape(svd_prediction) print 'Model based CF RMSE: ' + str(rmse(svd_prediction, test_data_matrix)) """ From 6ddc5d0f412e5ab7dc81786421d11f399203d011 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Mon, 10 Jul 2017 12:37:57 +0800 Subject: [PATCH 04/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A8=E8=8D=90?= =?UTF-8?q?=E7=B3=BB=E7=BB=9F=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/python/16.RecommenderSystems/RS-sklearn-rating.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/16.RecommenderSystems/RS-sklearn-rating.py b/src/python/16.RecommenderSystems/RS-sklearn-rating.py index f00200fc..768f06c7 100644 --- a/src/python/16.RecommenderSystems/RS-sklearn-rating.py +++ b/src/python/16.RecommenderSystems/RS-sklearn-rating.py @@ -23,6 +23,7 @@ def splitData(dataFile, test_size): print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) train_data, test_data = cv.train_test_split(df, test_size=test_size) + print "数据量:", len(train_data), len(test_data) return df, n_users, n_items, train_data, test_data From be26414de2e2e48c27f5159e688433dcbc344237 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Mon, 10 Jul 2017 12:55:02 +0800 Subject: [PATCH 05/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0svm=E7=94=BB=E5=9B=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/python/6.SVM/sklearn-svm-demo.py | 5 +- .../7.AdaBoost/sklearn-adaboost-demo.py | 91 +++++++++++++++++++ 2 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 src/python/7.AdaBoost/sklearn-adaboost-demo.py diff --git a/src/python/6.SVM/sklearn-svm-demo.py b/src/python/6.SVM/sklearn-svm-demo.py index 88e7aa28..23f42a58 100644 --- a/src/python/6.SVM/sklearn-svm-demo.py +++ b/src/python/6.SVM/sklearn-svm-demo.py @@ -7,6 +7,7 @@ Updated on 2017-06-28 SVM:最大边距分离超平面 @author: 片刻 《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning +sklearn-SVM译文链接: http://cwiki.apachecn.org/pages/viewpage.action?pageId=10031359 """ import numpy as np import matplotlib.pyplot as plt @@ -52,7 +53,7 @@ clf.fit(X, Y) # 获取分割超平面 w = clf.coef_[0] # 斜率 -a = -w[0] / w[1] +a = -w[0]/w[1] # 从-5到5,顺序间隔采样50个样本,默认是num=50 # xx = np.linspace(-5, 5) # , num=50) xx = np.linspace(-2, 10) # , num=50) @@ -74,7 +75,7 @@ plt.plot(xx, yy_down, 'k--') plt.plot(xx, yy_up, 'k--') plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none') -plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) +plt.scatter([X[:, 0]], [X[:, 1]], c=Y, cmap=plt.cm.Paired) plt.axis('tight') plt.show() diff --git a/src/python/7.AdaBoost/sklearn-adaboost-demo.py b/src/python/7.AdaBoost/sklearn-adaboost-demo.py new file mode 100644 index 00000000..6d3b1ac2 --- /dev/null +++ b/src/python/7.AdaBoost/sklearn-adaboost-demo.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# coding:utf8 + +""" +Created on 2017-07-10 +Updated on 2017-07-10 +@author: 片刻 +《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning +sklearn-AdaBoost译文链接: http://cwiki.apachecn.org/pages/viewpage.action?pageId=10813457 +""" +print(__doc__) + +# Author: Noel Dawe +# +# License: BSD 3 clause + +import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import make_gaussian_quantiles +from sklearn.ensemble import AdaBoostClassifier +from sklearn.tree import DecisionTreeClassifier + +# Construct dataset +X1, y1 = make_gaussian_quantiles(cov=2., + n_samples=200, n_features=2, + n_classes=2, random_state=1) +X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5, + n_samples=300, n_features=2, + n_classes=2, random_state=1) +X = np.concatenate((X1, X2)) +y = np.concatenate((y1, - y2 + 1)) + +# Create and fit an AdaBoosted decision tree +bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), + algorithm="SAMME", + n_estimators=200) + +bdt.fit(X, y) + +plot_colors = "br" +plot_step = 0.02 +class_names = "AB" + +plt.figure(figsize=(10, 5)) + +# Plot the decision boundaries +plt.subplot(121) +x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 +y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 +xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), + np.arange(y_min, y_max, plot_step)) + +Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()]) +Z = Z.reshape(xx.shape) +cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired) +plt.axis("tight") + +# Plot the training points +for i, n, c in zip(range(2), class_names, plot_colors): + idx = np.where(y == i) + plt.scatter(X[idx, 0], X[idx, 1], + c=c, cmap=plt.cm.Paired, + label="Class %s" % n) +plt.xlim(x_min, x_max) +plt.ylim(y_min, y_max) +plt.legend(loc='upper right') +plt.xlabel('x') +plt.ylabel('y') +plt.title('Decision Boundary') + +# Plot the two-class decision scores +twoclass_output = bdt.decision_function(X) +plot_range = (twoclass_output.min(), twoclass_output.max()) +plt.subplot(122) +for i, n, c in zip(range(2), class_names, plot_colors): + plt.hist(twoclass_output[y == i], + bins=10, + range=plot_range, + facecolor=c, + label='Class %s' % n, + alpha=.5) +x1, x2, y1, y2 = plt.axis() +plt.axis((x1, x2, y1, y2 * 1.2)) +plt.legend(loc='upper right') +plt.ylabel('Samples') +plt.xlabel('Score') +plt.title('Decision Scores') + +plt.tight_layout() +plt.subplots_adjust(wspace=0.35) +plt.show() From ea3231b6b47c2a512ec48af9e59acb8e8641b716 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Mon, 10 Jul 2017 12:56:20 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0import=E7=9A=84?= =?UTF-8?q?=E9=A1=BA=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/python/6.SVM/sklearn-svm-demo.py | 3 ++- src/python/7.AdaBoost/sklearn-adaboost-demo.py | 10 +++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/python/6.SVM/sklearn-svm-demo.py b/src/python/6.SVM/sklearn-svm-demo.py index 23f42a58..b8d6841f 100644 --- a/src/python/6.SVM/sklearn-svm-demo.py +++ b/src/python/6.SVM/sklearn-svm-demo.py @@ -9,9 +9,10 @@ SVM:最大边距分离超平面 《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning sklearn-SVM译文链接: http://cwiki.apachecn.org/pages/viewpage.action?pageId=10031359 """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import svm + print(__doc__) diff --git a/src/python/7.AdaBoost/sklearn-adaboost-demo.py b/src/python/7.AdaBoost/sklearn-adaboost-demo.py index 6d3b1ac2..f313aefe 100644 --- a/src/python/7.AdaBoost/sklearn-adaboost-demo.py +++ b/src/python/7.AdaBoost/sklearn-adaboost-demo.py @@ -4,22 +4,18 @@ """ Created on 2017-07-10 Updated on 2017-07-10 -@author: 片刻 +@author: 片刻/Noel Dawe 《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning sklearn-AdaBoost译文链接: http://cwiki.apachecn.org/pages/viewpage.action?pageId=10813457 """ -print(__doc__) - -# Author: Noel Dawe -# -# License: BSD 3 clause - import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import make_gaussian_quantiles from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier +print(__doc__) + # Construct dataset X1, y1 = make_gaussian_quantiles(cov=2., n_samples=200, n_features=2, From c701ebebbdfacfa49d77af7c4db771b1e8f321cc Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Mon, 10 Jul 2017 16:42:52 +0800 Subject: [PATCH 07/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0AdaBoost=E7=AE=97?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/7.1.利用AdaBoost元算法提高分类.md | 6 +- .../7.AdaBoost/adaboost_code-flow-chart.jpg | Bin 0 -> 86839 bytes src/python/7.AdaBoost/adaboost.py | 102 +++++++++--------- .../7.AdaBoost/sklearn-adaboost-demo.py | 2 +- 4 files changed, 58 insertions(+), 52 deletions(-) create mode 100644 images/7.AdaBoost/adaboost_code-flow-chart.jpg diff --git a/docs/7.1.利用AdaBoost元算法提高分类.md b/docs/7.1.利用AdaBoost元算法提高分类.md index 75a3a2ff..75f705c9 100644 --- a/docs/7.1.利用AdaBoost元算法提高分类.md +++ b/docs/7.1.利用AdaBoost元算法提高分类.md @@ -11,7 +11,7 @@ 机器学习处理问题时又何尝不是如此? 这就是元算法(meta-algorithm)背后的思想。 * 集成方法: 1. 投票选举 2. 再学习 -> bagging:基于数据随机重抽样的分类起构造方法 +> bagging:基于数据随机重抽样的分类器构造方法 * 自举汇聚法(bootstrap aggregating),也称为bagging方法,是在从原始数据集选择S次后得到S个新数据集的一种技术。 1. 新数据集和原数据集的大小相等。 @@ -71,7 +71,7 @@ ``` 发现: alpha目的主要是计算每一个分类器实例的权重(组合就是分类结果) - 分类的权重值:最大的值,为alpha的加和,最小值为-最大值 + 分类的权重值:最大的值=alpha的加和,最小值=-最大值 D的目的是为了计算错误概率: weightedError = D.T*errArr,求最佳分类器 特征的权重值:如果一个值误判的几率越小,那么D的特征权重越少 ``` @@ -92,6 +92,8 @@ D的目的是为了计算错误概率: weightedError = D.T*errArr,求最佳 如果错误率等于0.0,则退出循环 ``` +![AdaBoost代码流程图](/images/7.AdaBoost/adaboost_code-flow-chart.jpg "AdaBoost代码流程图") + ## 处理非均衡分类问题 > 概念 diff --git a/images/7.AdaBoost/adaboost_code-flow-chart.jpg b/images/7.AdaBoost/adaboost_code-flow-chart.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f1e288bed0ac3b62d2a89e3ba1795b0f532296dd GIT binary patch literal 86839 zcmeEu2Urxzwto{O2#90>2@-@QDp84|k_7}LCzTAsh~zkk0+Iy;1O$|fgpr&XB?<^g z&N&GP%n%0{X8w)4d-pDT-;M8m-~YXDTN|p2>aIGces#{NQ>O|$f}H`*t0*Wd05~`R zpalK{u(QAoc@JAF08mo{cmMz(0&s$D0emn876BN*8vtz~tzlk~$MY1|W1sGr=^x&^ZF z0+>Gk@aHu5N!t47^tF?;xy=I$ungEI0N>AT08;=!taO`I{K|EHxgSsejXxY7xq@{5 zxK4VS)Y4H$8~lA9yf1>txDBuZ;=mQ)I=~OexmbFD^a8*w$YakZw$?T-ta2b{Em?0t z93H#6SU%xnRdzHN;A0iKa`nndxsyIS`3(TuT0i;%hqmXpJjgo$xXum$c;Ub0J&Xr{ z$~pkJ(D7RyM=Ai2O9DVe_u~go9{l`0@F&h5{y%zlwh|WlQ67i%CIFn7!eX~C0{~$R z031hSu}A4x>~RKIKiFpp6q-mX_`UJp<=uF7BUrL`22JuU(h8bz5FRQAt@v`<{-j9!S2qg{76X zjjf%ti>sTvho{%G=Yc`NFG50NVqe9@C%jHfO3%p5%FfBndtXvoR$ftARbA8A)ZEhA z_PM>IuYX{0Xn16FY<6ybVR32s+sZ0(YkOyRZy$AVctRHrfcG<5;Lo2aJ4F`-NEa?X zJ{~^d30*k2?%<6_fq&-GRRT&mO~MCGRBS>|iKuTzzbpPk%r2~jpn3SD_be@k$Sf!F zgtQ-&{d0r`{9Ba$g|Oe~8V7CxxIZpDTwJ^}czAec2+n{P0TIE;MRb%Y_9$f&+d7A0M9({C}Q=nB@Gwd|@X+;V6b314!|3KrrD^0J6X_Iz6;?c=uSa ziNR=%p|j4dxbCXoDm_8bYiCKT^i-~w`57YJ+iabCM1#Rp>OrsChpeWWs_yMKA^tsUO9+Uxw5euYA zL$Clo16UKACKe!(ZGvtugYV2d)5QV{D!`7!pUwM|dw*i@&!O?BVEI!l|EV?pyLGN^ zPjH>}JCD?jt)@Fi_uO@VA}y77w{*2+YrF0m5iluFVQEekp3BbTe1l1cTJr`A_Ukbk zCWT4^)%9fBanKI=v818iyWvzH4bVz(?7=j75n@sbPt1k%P=NdY$j#d$GJU{fv*l_c=KQb1XL(R5Sm5SR6c%`vABE8c zoACP@D%?ZGe`%pN%3iUpqV%x_CRyI4ff^a^#~#0Vr@kasWpL*>+uJ||zOm5V{xI6l zfX(Oph5G?#OY%u|q<6bUD`a@TjeBg1sOh=G6PzDZ%BrnL<-H|02Rj1z-*CgE`k@Q? zs3^?GBtgupRo8E>%SjM)9FzhJj8MsAfxZ;=6H}S{EjhOD-+&d;d#|m<7Xp4=7W}$s zZ9{dJW(8Vr2>zWo6S|_FVZ>|+7*BCe>qvD-bpQz17|UG5aufNPiM1rIkR53{z4D6c z+JH`d4bfc+$HXm%uO>#<99rb=bF|(V{|u8rV1YI=1xQGe_49J_Pz=$aOJ}l^q;$%M zAcIBQc$%`1?tGyTGq-!|554fE?Fap?dN>;*yHj#dENXoZQ`-#RT+YkA^KjHUr*(y+ z_x;svW}`S}turS1@$L`Gcui!^Ua*Da%yXp8LZfH(%*uizL`LhpEuCi9n%wt}!Zgmsg$Uk{ zc)P`?k;->F=0)FvnpF75mrpaajPYTQP^!qgsIhHBME!dGj=LX=k%qtqrgdJ!S7prO z2Hiu(7yKPm5_iJiCsXJ0D6uuz!%>n*m40m`^o58TuV3t(!^4)jUL!3V$My9>SIg?^ z2agLrf0#~mEaH^+?G1Q=7tfu#i*Rr9V?%7f=T^#1&eTcgOo@h+K0A^tCmLy^3Nc}d zTi-2d%^#3of39v@4GYA; z=46j>ZQzI6d0z?N^~zJ@VEg}#v4H4$^cusv+ETUcq9zDw=uAEyIdn8WmbZnpxSC!; z++pUnS3R=7$9n+p7)l=Hs_h|a~?YO%`>tm;3 zf}*(V>M9hI-z-hbY#Vhhe~+o%Q8cORgP?h15N^+qEG-S^zoKg9Lp8i)Hv;0F(Z5N6BQ> zhwG1g$qatHe|2pxf|q$r;Y;kV&5g+pqwy4c-v7X=x;u1FPiXwA!P(me`fXK~)HCxmLw9lHssCqgQ_pNH(5{DMIesc)cKp-8+Sj!| zSa5(B(+_)g;NK7?zuL9D-7;69?QF=lWB*;*W6qd*=DPWLI&_TSMd%2*HYk$~cq{p_ zfJY97^B~pK6SHWB7DB)FVFIO^{0#I6XC>`WmB~xmL%hlC#WcB(B%IWzaAK@8dQCx_ktAxC?wFi`1w*1tK+nefKr=&o}?%&xw!QAjaNI}IT{!aR@8Mj1X- z!dqw~Z;}3*tYx1lRPwvtRbx*-%ht3?Xna?tTYvcOr?y*~+IWT(OK%uVZIgPt9+z{x zQ!g;`rBcIKr$aJ)wWn1Zs@1oq4%K$`d@mmYmGG$qSIro$;?i?(smu}&gV)88)~aQQ zabr}nLgbom|IV@2x+7~pA1$vpiu4_S%LWWTRw>7BA-z>$zUn-6{u*TM#QCklpWV~H zKBHNu*GLB~(}-}{44fiI?ocShZWXQ?K2H!bYO%Cwe~>BfB!YY-F0QG(#9t6-8EKN` z6mX$vBhqY4;jpL0FgxEVYZWqE>wP4B<;IxW7Jq%+gNE_<^~0m2riSf2tcr1Z9f1p4 z1_3ELKRG|yt%nN^jxS7&#Y`p$j}4=-0F&UiVLkKjT{{q%KU(cb0ez!iMi~C=5x>tg zX;YpW_1G(W(xHmIitNu51^t(2Vw&$n$F$t^O&9g)czbSoR>y z%=cWsVP$H+riR3d>vgLmDK*mgueV5AU&0fQmdA7yjEmVXV*&Sd2}3L}3{zj+l5tdr z_hEtH^?@Tgkhy4Z@CRie*Bhc{I}7#h26d>`6W*?=B1)3#0~Ne*Yu|^eK|Z&FaEit@ zMn1c?FP5M1N|2DYsBj65zy`+79Q`K#y?PN??yA^|rKdGlOliqzbBNFRx)2Ia2FU=n zasO^3Ji}LSEana+{k#rskA<(*u8%EGjUTVL>ou8`4!2H+rOFKPm6_4XlUC%3WnJ7? z)QF_NXsE`gBrfl_S{P?MHvz+Ul)8RoBdr>yTV-ZMyZ=-H2~Ks?FuUvmzo5mRYF1F@ z{A(;QU~Ym1`U?lWOgz&XWqeqE!UDMswOAm>=>1N||Ck@+U-2sp{)c)BREYngqDq`M z!|}sK3l^hHX4my}5WA7(xocu#`~4GUMp>PX)pFcAltDxk`hpzPrl6k z>$`ux`ddf+k;i|s>5ot>!qY`SpZl;Qd2fJ|3B6mzy$!>es~Go@!t{!5F=N{!_XE4y zij8YMciyz8l0mk}5f68zBlXxqWBryrGTsilefKc3?|81wY8-bnHI-J` zCk#|0;7>NQrfffLbte02AHfbLb0*0;RlP8>i#6a7noFZXQy@1=5z9@}WFl)s{G$8} zN1!De(qz)f`KCf^(_Q>-U5^v5ovC$#$fK?g2PK3D=I3HO@a7y^)(6p;^9e`lX0+o| z5qIn(zf|C92N_7^KfHOsHfexMnfZ3&?$vQKdq@Vf{#sX_Gt@*@Qjot<9l5xI1;}&e z58PxZkcy3Fp*2yhg+_CilQX8eW?vVr?;Eric9HEamdTIJP4(fp^M{R2s+K5*uC;e; z$T3jW)RGhXNFr08z4KUBLqYPwNeYK8n25xEC+wA#w7vZFHg#^W(`GBzHW`UXiZ zm-Oz(uC1dod@Vm$Gsz@jWD%~ryY4ySJv~D!$I}RrogH2D%GU;bpF8~pF47Dd*7*g5bS4Xvy4_Nj&9C6%L5R1C| z_XG)Vf>EL%&0f##a}8S_e_tWXj0^)4lP@gA)b#qovrq5*-Yxr zDG@v2Xl3I}1Q5^FUk(M3W~aPFpcCMfe`)VMH(m?$4^AdUG^;G=IR0kS2S=!ZJN2>z6E6a7#6z-Rf+Bdq_>6umu+>`s{{Kw- zULpj89DUelKkuk^Hp8oqa3vW(Q*T5 zwK-|oTLU7sjQq4)yh=mM8mUWq)7FqDU%C@s3QoQD$9)Ve3{2agKtHS8Bozq7eOYwL zTk`wqb4Ht%Z1A1Vo zI3~jj{px-jGS_H^L2LJyL9e9G?4@hT^IQJ%Vv8KR0{*w`QBA*FnK%gTwT< zcEfE@pt1bd#DS$wi2LhOI>~vcusJJ#K0(N?G~3>ogQ9v_Ja<{=3y!N(lP!;0EbLcs zOG3m+Ge{#};cSTf>haeL@0nwHA(0kRr;51)Pu@vw=L$mM;U;hG!j2{h`{tFUt)^_xwPIYv`DTdxVDRe^J zU!&m{>i&Y3zfkwrXgJ;MPx79Gf3wx(HopP^@H(cM#{#E1It!!?`d4r{DRn~I@6hlw zZNH)ASK2_RpYD{uYW4?B(-U_8vDst4;P*FfgDVoh!8p&a7isnz%1=uDAntdCa!>ck zuhji+jQ*kF|C`YigMv0Qi$%5W`7t0lmr{J~eRE7F^9>EuY}u4;MGNjzaSnd8eDK&C zm+0bf;crguf5j2~p9EI?H#wsJGxO)*)*VzSGGQ(?KX=lHws*(IM=(cyFE7tN$|U8lcnqDfA=xZW`vD9q7MiL1<*;hYh2BXVXYMlI0LZ z)lc{Z&`JVlVrB3lobZI#oo;U13LiZ@0me*7pepz3!C2U)C68NR ztn1!o^t5mA5!Jdu&!KGM(s2fSVzlzOU=Lle7*LQi<*v(L3@9ii(bt}ByIIE!eE3WM%)0+De zU&FfwBy|oo2&4HB8)@0AW5c13_#c&rxz(1Y{Nb~O$y)qH($KB6XSAu3 z13rXv5wde+$3t-MSmM!>hSXUlcRe-P-+~zdN^Uuc}G1Zlp2La;v9f{=-IuZJ(Cb zqJzn!9dB8&gcbL9hqOB3`URp=itNEc>V3L)d%Z;~;#e>Qo@A)bY1Zsq#HI3j6<*O{ zYuX{TOP4OcYTOPa3m4p&x9U89t@X)`Yb409*$kAwhmOxJJ6CP!@oQF6=RJ5SNsr*Q zaZKj2o7={L@RCnCa=Muu54E}mHnmCGNava1z;3=v=+NBQ!Z*E}{YNTF6mv0>ol3Lb z&ovk2d78y$D`S|#JOblmKKSUA4_q*drVE>?un!LC@u+*5on&bh*7UFmR~S zdXBUkya9b#{j#%?NW#&D0mGy370g~5%TVk0en*{R<85(EEXmzl-5zR_xu|IGTtgJ2 zQL_|+AhXwXXed-S@i3) zix?VZdzwR!P(1r<5gExA0hcLfE}heMWMn72cAaJt72LwW639n&4@ovQYEXwT@+>OQ zw5X1L_~QEAn?Y_m6C-yuIP?~zgOZCTRvMTP=n6zuP)53K&0wO@=JlzbFU*FL1}HZJ z%Dd*wR*z*K@lw5%2|3Odl_fDlC4Um~k+1$*2q~Ks)by?wm|MP6J<7jkk<&y4f<t$n6h>rG;ktv8z6Y|dhHa;iMS=W6XSJM3W;bEeF@Z= zOd%S{th}i}?@ag1PsoQK^-RdnL$Jlm5$3VN5irE~y6bBs*VWqB#M?K6<~g0dF5~Mh z53k#1+IbPw&OxX~R;?9W`Ux^iMZ3z2<1v`-dG(m7FN&FJ&Muu0hngG1%Ec<}HLU5k zr%@M%*CF%p?<;A|$Oy__!%nr-$@R-MHW3A4F=Y}838IBxt~Q6U5lp;Ld@Yng0ps;e zm@i5T?)l77t~)xug9T(05)aR$d+awdvxlk2Dh?JdIham5uCrNhEhiD+htSfCwO5|? zf|;-A8*aW3fl+ywDG!MlV0x`(s?*=gwBC7L?(H$X__*LohoZVyB=6StEGBu5AGc?A z`8lLeWL<}&-*3_$0wi6ISyeLOe;b2Rrl_#rvCn;Iz`<=*X+X{G7VaC1*p`QqV|o}a zr_9)UI6!183Jv=tE8}L2VvF9sP`M?1@E{ruo&pGFwg6=tUlRjgI+ z9<%Szh90#2MPFe7dT{2UC62kbHiT&b*CD=weFvXmx&Av<=j4S2qUwZaqo{0TXodyk zv!+ANd@Qm47_m^fwN3BXJ!LaOjvpW#gR^D!tNt{j#SFs{RX#mL;I0Bp^K|m&2Q2W= zH~%^oP~4W9E^|Lh7Q+JNse@|^Qi7zqJR)3%V(z1p-^cnU_qSy1!%&!PJi)xMte}*+ zd89&b=__ruY1Qn$?c)8^@DU_5vh7%Y26QUv1iG49`m7yES4I*Bi@rnBeZfp(z6N)1 z!-pL(-KAS~J$vZFjX*hN|yIgfb3;&}hG$lFs=Oy`d zmDLkGSR<9QuC$k5a-uF8SEpeL4SeW9sswB)!P@I@sSb#$F{+m;U+#rnj>H0L_Po5w z(Z>m-`}^@2W5~(2LZ+lf1CIDrW&D`3n3D6Kctf>p#}~ItD;JM#((*2F(Fsh)Cx9BE zC@niYfbWfFIKphJS`+G zbNJP7mACMwwFU=wy#CzFocKr@^9d7Q!);?!EzC!`7sF;l>r|ruO?+9occT!~C6HkY zaelBjTJd?Ykh5N|<^A=`S1|7LUPHKQ-EE)ydYWI|I9CHf|k zJb3eR?U0oAe1z;x;qr>4N;kLumH2(uTO4y2?)X2d3~}VVvcT(LOJBJo2E8D|KaG#z zAIi#=FI3E5J+HV&;(Ko1u&SiABEahCR&sWqgGG>XV`p^r*LO-^#`_SaKgGLCQ zN3B|)D7TKu`i3Lrb(#x~r1##;JbN?z+3z|k7}?Wk$}v>OXgeoauMQ#l_HuR8FMfT`7pd5c5kN#%Je1HG4i)((0w>1;=60x> zvZ{I|h$v&{>j3G;a#18QG6-A8YbwWlRmDVJOKomrwH=q?2~E z3_3*_7xxV_ojV)wQnx+#pL>&>0{#f_)^4WnZ`&sa>_^P;ktN1Qxvlrxcu`-)KX8wBOlcfziuLNVm1v!tz*F zA&$N}4WU2_e84>7_0>xrYcV(n(|f4{<NRlmJ&)Alm+&-7qub@w?q`Xf>$0C238JSOSAVvSd8yj`iwaa*LVVaK9L}>&$(i z$l@)X&>TU)Pcq9AB_*Bfk)w^24HfML*#^c&)-M zKJRx~{EKUY*+!)^S z`nzkgD({PamD>7Xu^|UG<4@8h93gyO=UyUTmVB7$IOj?7v?eIf7Rd$;VHW%_99}W@ zwtPUe-qmGN4JMKuxz}DOGEU&%Mz5MXfBT`EPv_as7ea6ghWGU(3=QDbW7E$tWtX3O zCWZNG9UWqLVP=W@H2ckFgezn2FI?^Gm1+AMix0+LJ!NX9I3Rh#xTs}z^~!@%H|M84 zG#{kYGE#hf1S;tV8HU#zloUQ!PO(pQMdZg{174~#0)dZw2=MXq%^&B_E;=Mo@;TGo?YQ8#zO|ac)W)8JmMJp}qVxL+$?QM2xtA7H?!i~C zwCoufbZAo@&nWLo)H>OHz+P=$pM_&qpzmxaHGIU60aww7Co^nn?j1~C<6nwBnuPaY z$Py3v=LA_$ijD5^H6S6^zHh+HWAq>1|FU^eUT1_Sz%9=e|M0_Z7GgOH%{ez-TT6a! z)oE&CBvVU+GDt7tvH<(T0-Pr5$KZH zE2#fV=(u)lCtsg35Ah{!sT)EKU$gV%)`zFSa0jC+OZ(d3%3^0C*I}Bov5y8qw8S%J zHLrAiU&3lpnci>Inlyo`B~DBD;^WWyI~{S|pclD@1WJwa+HC5a8+TBdGNCE#8mU*- zDeS5;Su(rARExV^d#%OKb^@i&g1U*OqD`FRw-oKVJZ&`O$norMRp8Ch!ezb_XN^yz$qW68K;}h2`YiwZIR2)+( z1lBgUa4uP#yW<_bovb1?jERppG}{=-RToN9#hk0jT7!U6SymaD5VYI5`F)c2ZsgMP zcipiU^4)4#vrhO;thhzykIb{Lw<>t1QTSMaaU1+9%Zffaj}Pd2)Y8}zLXjdz6Ft_$>ePF-K0 zD6?gw){Rt)3@@mLNuxq$ppk9GJ0)P_8u-64BYd0tJ3d)Jnd*LVMHxZ!Qv^EDL=qL#+_ z*L1EMvm`nrHyicQPK7=fk*g-grYPrT8No`4+&ZyJcd@Yh@yKy1TVLtLURzV{?p~30 zzB{8^ZrofgemraQcLw%3} zjam5au`e*q!{yK%I8WaWO!T5vGwqkqwsb5um@hRk9Zx7;@i*U~yP*$^y8|i<*1T^5 z2$#89#%;^PB3QJmN{j1iJ|O%;oXG|k=f@`Xdql@WxQas_y2k5$q+}Pl;s0J4ztD?0 zc}9{24>jBrKg>Wfy7s}NOe*P{JG*h1W=VbG28ZQXx;f8>B(hp_ogvnCju%$4k6&k?=_JjjLl#8!96lpe#Nz9wF*-!dn>zVVJttvuS@v4%h#o6(D z{^LwWOpoO@O~ixiG~S_Ls%rL4m3dH0_Rb6O!g13iWlsd90wS}NqymUwpQGqa?GMwM zWzYBIe~V{`%y)Xf`eCBeA+t?{ZFx7w3V5k z-r0%eQN0oud|0CHU>qQnz!OAA7%u$~tupY)=T@$7=(e_l>3CZzQ;h?oYE5aS{R>0O zr&dOy;iINvD5lI5wV}nc>h}O1+M-Th=;h&Q>wV8ydRtGZVJ2n`2LF7h*fdQe!;C7y zs0lVD9cLX+Tq*BtxtMk(@NB-4Tw6!hE2?p}5?F&cn!CTC?pCg;M{4&qDITZXl!ezJ zfn1FDaQl|7Q^Q^{@~MP>;E~}veqBE4+~W7s&~AQWQMq3K+(23G!T{a9)KPXfW%lSS z_z)yO@$n^-PpsE@%am>|pb!STf-ewjELQRJ@+Af?>$IC1{V+j3QOWA3G2cczAvr8p zZoU+9vACZ#e@$1^&CuvQd>PU>ExuqF8PO1Rh^b}YP0&HYf<)BL)M@6~-EGU_ZKJQ! z-9BH}SHPWNHK(fd>9J(eVqt^04_n=^UXxp@P@mO+>9s7S_${`Jca}tmjw*x`E{tS; z`24v^z|U=92SR0xvYx%S-`A33GbF=W;wiFNnlxpU;|WxH2UaN=3eyX+UwZBsYmTf; zi}%5qORo02Vnauk8{#T`t65Q_M%K2{I;NYXHsd0}q^Q9~KF@gx7Z3VH=c<>=U5a(p z(491lGStXYD?Oa!Fn1c6utK+Oy|$mIn0U|UlVYNlL4wtI1MfF*tKX39PL4Xv0!H0$ z-^~*k+!F89vEE4}qi@N{LX2JjF;Pd~)=MQ@$YvE7${a{v$KzMHs6zKidu#B~3 zmQgN@+IC~w-Z8jWvVXYz7Dj#$-lhS1Q*(VP1Tl*Gki%Ut{6zJwFOxjAUZ}6q$pX6` z9SlX-B*TRidp1}2WW;0=^~ji!)8|5;YLuW5&Body*>eOj7d;T!7djMS+n*r9l$yV( z3w;{IMWSBj$RCUQ=YuFV8Q2n4%zx!o#YMFM2@g zQBk#)!o2`VhUId>0JF28KUz(@nVI>SzS~oW^iXvDVtE1Yg(%Ccy6=c_-1p+1+<{RX zv8GRRQL1w^IiYp(?`_S+P~d{Z z4PPoOKrk$eE-Hu&D-DmGrhV8ncQ1_Zb56GQnJj>OX161 zFWoyVxH2?jlX9N6neC2yW6!2};-!_W!#{#4l zLq&CR53Id2%*=4Pe7ST6M9N>%*`E9Mz37s<$!Je!r|{K<7+-C*;)=eqPT$L;Sswfk z^`mo=R6|s9KMINzY)P8uOVV=1D8HW`Nu!nhMh>pzj)BrVZZKp{7nM8xzA-Va zDDJVeL>=ORaS7rlaS5(+)-1VbP82j> zxnRQ1>p^YJaDmyKn(9=~?5d-S#YCnTvZ6<4_3o0q2;G3Blq{4x$jv~A#!$JaseUz8 zhbKyYJ^TZAkRr$#fa~5RD;@%hOj7NXOp9f+Bl5YnW1JlgG~)yqf>9HD>iH?>*Ci2X zkPIPWGkC*C^u!r(zq~GjEnbN;MlxgZ_N9oHjt{Su832l=AY)40g6hb9tZm?ovN z@@Ja;8;ddgDs#5@ZthOTSMN3p;g1DIkO(Ok*;Rf*OwxDzx6=j;QO_GL8G7V58)gSvXHDs^i94AnI6{^y z;=~Nb$AwBb-*oQ3<2a8FxJEtwT{cX1hY*ZDac~CrY_EQv7~SUm40<@gjXRfhV->d5 zT9(H%w*|O-w}YykQuD^#X-o|t-raXRsEf%gOe@Q=6zbm zoKR&wN4FS5qR$)_<^ed@A*YbVb?RBhIs;Qtx%jp=ZWH~}th83AcE2k>L&rwHp%dqh z(0D6yvg+QA{`{XPuiTSAQ}Xz|1Wu&i{H$VQe*M}0COT8H-t69;0|p~+uKxA0F?yCw zDJSZYg3;@&xKEr-G){H;>AEjXbUyp&hgab%V+18ziI$1w*cufwXv`i{O~12AOmbB5 zD#n60V7evgOTP>?ny>dkO+_!7DMP=-CyJ&~E@!;8RPmD53hO*2Qh0Jf2J;P49DEqZomjt4r)OZSbEdjC<6COJH! zy8^-k2RvfwG9?PxGEnm+e#sv17-yTWt{hrevg7l_7=+bQFr?#lx4e+op>KR&1oWS-IrDVJJ8Ax7sNu80i6mck z#p=OA*R^DB%Yg#(cwX=Lh&#oKGW|Rm(Q)L(?ttgVF6X2ie;@GzRHYg+I~&3U?>l#- zAFQ8~Y2Z^@nRD+p6I6E)=T@Z@KaUgf>i%bpQj6qLv-JE?>1~I@ z4-R1TL1Dr|%mKR?3!%+pk47ky)vB7Iy|WEzWdfxfwSc+6hw-|g3ze^5R!2}58^@^I zAc6zX{4;7o4qS~DY#k(ti!~{gb)_0kSHHxniG=rcvO6WX@%s}z1{k2Hs!cofY%(?} zn{)8&(JWsBsgGz`ajZs7-?Uz+{mwv^MAl$;4o9Watv!#0=uM8eG>aQN`IPzL{^X)i z1C-tjKcrC?gIm>~)ZbJw9x$SB-1ES(_rv{OrTCKD9D&c6dS&h|$F1mqSY|~%1GgSB zVrUZfXcLa}j^mDtm!Nh%d(hGYEMPre`&|;6>TtAs{zVOmLKvG@yX z(J>5m*=1+V7VUzCnaz#~vjkVv8}hIQA|DRq!tlN+AEG|xcq+@0->S+3DI4?l%)OWA z+be;H%==r{^JS<|vWU%(V;_d4c(dyR4-`WU;+JNVW*IlaQntj?79-iV-91&e+R8nO z#!rjaQ^LDl@{xQU&XT@tH8j@XIiR2&{L)CXRwQ~@9Qg8&RAz6had0Oug zSUuRLwEc=^MS0UEn5TC>5M4|OJhFQzS=*TplR!ppN-XBYjV->KP3RV&ejoQqr_KKF zD|!{(@x!%))yuGBA2|Oxdj%9^)`X;m3e`Odadh>ZPofarc<1SLDRfRgUVkQX$$%2y zN}khPhlMB7|LnjA1SH61n#Rsltg>vf$gWrse^{%Xqlh(ZqlxbI7q&3jt4fV|PxJ#O zh7wL8_p~Gy&2F+gdbI;TYa=1}d={aQMxvN0_vld{J9{aSN*r%p%)zI%H%G%s%12T@ zJTdkmEk!vblWo+(FI>BH@#C+VQ?N_Bg9Md1jpadp={LRFgevjBnEAR`O_K^NW{iK1AFTwv5z|aXT#(PbAgv zlz*o7u7_JtNb>HG*)J88GzpGox_VyPW^;LPG3!ROaE1JVG0j|!;3?`)k3ohiE{90N z?OE%l{P9l7_A*{eF$wRLpQ!J*I3Bu$m5r(4Qc!>8K8pTako)BBPaMNPIUh;;T&_u# z3=vry6x$N{z~R-9by&r;A8X+G5ngCL<8;NZYnqtfVZD&G zY>;xSTRw>hT3PN)t(EUc$zio^SM|{7F?~DN^~?3pe=ycn6Kf6X*b$k+10OSV7l$YcK`AD7XQ^57`$(ggyB-0vsa0c>XqghBq>vR z4`G4LjW}=_ZwK1gIwbq;w~XDwD$N>yA?U2RJls)x4G_8RCN}z$} zb1=s|H-TU??O(y~E;K++8fPM`FLs_OL!M#nuoi&GsN=G%rNGDKwEoS7R z`C+P0;~ogS;t0ASW55onOew0K*f53!hO`Y)CkN)}lrG5r8A;#jr zglXVEiH99X*DqN8r@Q5MF!`IvS^Wjt_p!j2zYp8Te;F$Opn8zgKLhulVa(qh<&%Gb zHOoQte>sGye;Fr#uex5i$S+9!9ZLp}3EP(Sk));!4G2s|hKYu~^n{w*P}IffajC1H zaVo0c1QA#qeZ&>+p^;f#a_IKsvoZU5p-2tfrgq8*`+G2sK@r^|;wF<+gAschH*mklh(Hzro~ps5VFd4vaGq1CTDgN!1#EE)a>Z z6KOZ&CxhCTeJbTl_tX-R(zV{QSZh4mbDWViChrpcy0?)eKlimz-TS8oa*rtA=5n}d z`smUf?&buSIk2fFht;aNSyHM*vy^>vzhYR%9%QV_(e@}X=xa5Pb6a<+1cCvzIbYu$ z{dUcr;Zu!0IZ8Rbx~wv(O|_rTz_HMQe|c+2si=sPH;9`x?O7){rHg19I0J=)ym<;_iU#v^96e!ii(RY~aRlynJ^zZZKHTe(}5U!1X6 zTj|oUvxyqtl#HUs4AjsCtbb?JLJXCjncRrZU4=L}!Ill%S&cm>H4s)6b=3b4(|UQI(p(hRGWJX?Lp>^b5wU5;b8K(ecV> zi*4pZ%pKmv*^BHbgN`v1IJrQLQRL>~^db6?MSdLp2+@z`u0s_{QE@iJQm2iH@t7Z( zqzqP%N@u1kM$qbKc2w-?v~{IqGHe%W?jQIUf^kTkl3Ffbmw5dC9Q0_sh(h2p|}smHsC1go##+|DQWb< zsI7EkEfro2{hU%aVdoNLL4aIw79N>Oi{xq=XL)gVbZWfG)zdsj&64eSJ>3bWewT(=`` zN-8IJKfAL%f2}e_IrdAmN)vOl47l5jP97y^JO_^d2XxnGFwnwAGL|0#HkF@o3?5Rj~8%tMS8HLBx9(^xcfyW*1LBnWi zsZg)&+$b}dnTJKXNJaQcMEdMPEX-f!KO%a+|Gq%{>R48gGKs<1a;vwrdKw)1Lj4wdF#)_?z4RkG=N}YhvBoMp3XJP3aw!Cejos3IP@dWQr;k)DJaAjC7Sz1QCFT6g5x=l#P=XMAQd^UOT+ zYxnQI?WuPpTR$~}v!dVIjDyS;AEJ=VxIGlfOiAP?07gw}Vin~+y55D_M#}ri$Uky$ z`_Xkh{5bRexlnbTvPcCrCCe^|LlF}hl4V`PEQCpi)TYhMAdXA+Go#K|RH&bP7`nwf&V8*e$!A;~qKMl(%UCiJ zxqx){RdsT5uYaA0nQxUz-ngU+*2$2+(z|xURxr&lP$tmPm=Rb|X}SIG|?wU9khyS~B8 zNIIQrW+PUCOYMblfS=-IAg)NLt%van0n&F<$9nlmal!HIKKx*!_oQfrxp6FBLx8{l znY3vRno5&8%l2i(iITQ~t25jz3TD=8t|?J`(?{6UZs*Frl3yFR?E3ayY^N(ZHHDj$ zMyJCCT#)(%3@`0gvC=JYI`2D+UVEmf z0p3~VF>lj6$V2Osrxk-NNSo^24wI$r7y*))@lPaNoV1jptTx_{Fp#K5=>=G*?;R{8 zxGSe<&v1F5wy|Ej$YrE;^97vb5}1-P&qnCp)CNEQm5Q~Qi01W+myfFl z*cwBn-?jKGnNTbZzdbR5VZ0ViQXIc*xOz5cIAj_bs(R&yEH{3tVN)lUy6qS`*hw0(^J%FZ#Zltv_vnCN=PRJ7Fh@t#sJxPa?V+q|<)Xa~yi1i3??kv+&h&Wk?eik^c-yf?$@~b2-jF@s$l|b%w?f6)>V+(G z$kWle%igVbc=Q}TU-iq*Rks=1)>9w%<(kD&b<1*BU>~!yU;uU~p(>k?pOv`j8T6PP zYrqN_yA!1Snzi0^2rf~gFVS`FHZliz2FZ=Y%;ux&iiqrz3e^=i8()f(Dm%$T)%1uH zWeftoWGAUs3&;Ejav`ZmCfp#`eh?Yrgc3+;u3G8B5M=!kwy{JnU?~#6t;MF zL+y6^f)C%RJ!-<{_(kLczOr&rfu_a^(6iZ&s~P%7x3;1!n$N!ADMSJYw3fbP7?K18 zYZ|Fpy2>f{QPy)id;R6-{%U>``tl*(>)zc~8^*`vP94M zc)MPwK)PA?qNRzrMY*dsCobRP9KCYgUF^6Tn*F?9E2D*DRU#U~KD{K$zQOm+m2W_^ z@B9GM{w_9W2AQrN^j+(}Hmho^Fo z#NV4*l_p|s*5p1DpbhY_B)6*G&o&K%h36GB&n4#HK`5h%xHce)@ENwxi?D z+#E-JP9#;hH!W7KHAPx3sD{}!gD<3+8{Fz(<70o)xxF&*vBGGT;31To@I0t*3Ssdr zNsf~$T?BhK43w|y6Ms9I%!YM>Q-eTtxw#h?DKZH zD!uX?9mr$0jV6@ob+7R<8LFggBd7D!hxRoB->lW^jN>PDwLxweYf?9|Y2$6(Q|@Ob zDqN39u6sXiWlaL{4WaTW=DT)u`>xT|(&a_oVdB-{% zCA1qwxi!)E>ms-#G>u5PKNr#r!p|*=1lB`>o4N6d*o5?a#2WkcGAu}wY?ic|X*=%8 z=Fu~c!!Cj)ASvNb9cR6N)+7X=C@Me*55DQA^?RX=9mUW(1G72#xb7Nw} zhI4evsA7iuNDF*x9Hdj> zkdR3iAvb+17aB2)kwi>k?zc>%{M^7{6Q<12dnhj}zT-O|DFaNJ>*v+2CkSxZ#-XPm z=X@3LFPxpYphNcjGaz;S(CoitD2%r=fMO#RP;Bf5c84j0{N=+zXO}UP+9(k zeAWLdBkBLcd~LAL|KG`1@jbnVO$7Rww?uvwQEcXmTAlZzm~d!9+YofUwCkNzYFET> z=_-=bd}Ok}i+2eYVu%CWQBu8#OF}Gw_w^#kNrC9%pwD(R1lQYP!`*?w3_vXRw zxS2T!uBlNj&nS$Ap}L~d&c~Lx*79j=Z&}bYpvRe$4gqSv3Rw4s8q^4W`fWsAV+sUt z3BH|rS9^}?bG;Dt_mAn8m!9mdPs>_Ls)94R+y6l6tOjws6Kn9AauBxLADLWTG5=KR z@UU7(0!w@K!uyw};ZFm;>mW$6Q5|STgbe{SF5XiaA^mXFdA4?+aof~AVtyi?ZVo#q zHQVQo&H5&htsm#QxaXZu8G%$dK@4JiQy(fcF&xwYnIS{S=0c2Mpw0ufTA}xG2Vyy< zX197T^KoB8G~chq0bx_8p*k!VXCSmKo+KzNBG_+eUqYiQo=;uOQpIw)1S^3QDqsBC zk=K8YwW+2%iWyg7Wa)jUu+S>~+RoM@#yQM3SKn@nPrs@F(jP_s*lQ)Ke}43RB!8Er zwZI9X>hzP)3~2DA?8swE?c}o_Q$-AK=!+{oOys3a!7rl~ytyz(sG837 zt2PJ*?8Rb-fT)lIuLd%}@QS@2iM>3~vRgdDFIPzzy_AsC(d$$VUQ6dU@TNlfygGe~ z3HktE6w(Nil7W55oS?Q5@R6Dq-eI;d2o!$luFGPk;6cr;)F3TR zrUHr}Kn`^qDgYZg-TENReT(eeJFlfp14+d7A+e=z&!#7; z0+MUdo~M&Nvcj@*n7j`baSsyP5lrI;Y5-%;m&Rc`U0r3g%jJ^QyGl}aFQ_#>dZj&V zEp(fN&^5}G;1KSOX8D`098~-6BHP7jGc9U`;2ygEZMwcr%aWDFYqav$qKucFJ{Nyk zJyb`&RU=sDchWLi_3_{N5Z@F*m6rsUU9Vu)qTDTC*D$=;RroNhi`4j2e;PqMqSxVMxpDhjw zDyq8qEX=)hs#XGvtk~v151SbF1*veE#Y(h(yVcIQ5P2DGjK%pcJm}O25gU3qqcnT{ zyN@jJnZUq;Buz*#$j)^3S)7!YkaW8!ZxrjM{W@JRmSopBd${Z3jnl_5K1okP_UQRz zYA5>Bu3#W|5le4p9$-U`bep@t70Y1886`HjYp0e)HnR7y* zUOGa`D5QTS|MFD+mA6aQam*9(81R_y@ zp?y(ynDOR!WOavo*UqPXrW3tXoZ+a>IDyi=LTSX-0zNKo2kn@s2^`sxmQDg6uw)w~ zSa4C$z7qO$e})04DDeIr-U!PO3R26M$GNTxKgYdQdtzjBs?S}kD(=B{j@ozK-r&uw zL9kjMl2aAz4QE4U;;S22=TjSJPT_hu7uit=wPld58mWEI?CVO&-XvR=7p>$eWi>3p zjNg6A)G_E_X9s#qPiUwZDPcU%`m2(ookW+0DAR-2cBORhIQ_rTVWAx?=~p(8%oZcw zD%guZ+G>((0+vH}yQF8$>6QDV9ItiQt@tj00(D4`54+->nRDagVFwVTk^ z+%{x;doJJ75L9nm|2?hxqB8zN$J-}oS3$O>u}?u7Jy$Z(ln#(Kb-5MXF=@?U-)bU~ z_9u}Zj^p(PgrS*X%@t`siPy|q!&HqAM0=fnsGxFuO)y?d^H5urScw0&?(auo85Lr3 z&VizJ>1N|2Q;7iQ)tj))S{0$}=)3&SV@P$SKZX;JU7LVQd(&jQ2QA9I6Bh}Uy%9RS z1Cl}YIB-^k9&W6n4AM|LZD{py5PcvU!$@Fv57Q1)!4Ps!ig&+Hbe0I&TLj*ZHj6dL3dsx(cUl!) zSa9KzF^@<2Trx`3s~IgN?#Z01O5Lp&GiOQ=OSHK}7Ht{>);BZIzp4!DMt^8;mt>bK zfA1i+`K=UWQl`ebx6WfS?RVjY>;O##!)1nEm;0QzKC_+;EJ3c=_};-qV5uHq6tj5a z^9QnU&M6;KGXteR@r=EURmoB?tDUv&?ILBcO)=H#*{vVTJWEbCY)*3xJFT+p5&w=l zgse^?Gkw$YCSsbZ+LRoxsc3!_z1My|FZ%fj`&2=QE@w2UN!mEZtOG*l%gE>Fd-Y+V z^-09|S#zF~lSlU4I@;?^vc+^sNy51j+Nzq6jF*C6AD3>lEGBuxN*uLo(BbuPiLkZ9 zlb|pTm9L|f&}wi=xTiDPN6a&Ryi!pmUigM4_cLg=AVm%InrQgSK(~97I9?dJ+;+?O z^N5ilK(~^(;gh*dF1-V>e(7s*k5Q**`GitU$;!#X<2*$FTU`6wq8qAAD#x0|-_qHrdUc9_j@@UbXdj!8TiOhyHxn>9L*IDH0Tw zZN(gsTSV&$5c^bxde}#C>x~Lk6&3S53SN#M*9i?+rhc&6Y=~{-pebz>!e$%eRtbBm zfD`3tYGrk3b91@QVrVhV%HBM3-Z*v7;UPnORcnvgb=|yet}L*%C#?Nc`kuJ z9fBZNL2Qon@iL_)7dkxWVie4W%050B?buU@c((v@Y!Hw*M_gjg5eA_kR!XPQ7kt}t zM8==`SoXP1UC+mKK|13ytyw?6sToq($9T)QYYbi=YrDF2INY;tU-I|wRAA`h2%-TSPDi}uPB^qIS)}HU2}(96Wj^0~ z@yDz4;k)V=yaDP3By%f957lqNKZ)#grFVj?SIFjwmnKZicDnLBYC|m zY|h@W6(QeKUv6igNl}1T3k<)zf4Ae;r-TN_qGw0e*(^9J27de{X?EVYo4kRM^}a6W z!ouFRI;N&eB!C^%oUV)s3KJW!-(WQKcv)2!X|De~z`{)H&6lqvY4_>&(z} zIH+@SGmIXVIj2Dxu{c)aM;G07olg^9SMO(qqG{ITRDN``qlmM25!68R1=>z2en_3YbsT{?{UOi;$kCGkRa+v2ffrwqNCPq9Qg zhq1)xB`NhtuEG`CnVy%q*UHm`8(K}zn+)c}jp0gVQ8&%%T%L^ct2{DPpK^s)^FX3=ie)^K5VL)eO?7tCs6jMi3nvFP4|+; zb7#leaaWZ)D2*JtPP78et?cX*8Gbh=J*Mje2(WPd%2$geFE-(zwmj z=H0dfUz25);mG0!(h0nsNa9+<;xZoq(_PR#q6^??J7fVtSJ_KI6 z^O-zvUiRaUp-LvZ9U-1kyEL+biB-!PT7K@0-adiS+uxaJ13`XYesJoXjoD`|Pwf?< z{eT5hr6%tK(Z;a9ck^-KQWNi(0cXdl2&DG!Y%`g^^5Fg)H{c4NVkWAhCswPXZ~L+B zC&B|NbNS(uH8>mYk>-Y$Jlh7G4mT%5>3pP`MCBvT9VglA7{2#-u!mVb4%R;7Tj2tb zCRUgjb~{?t*onWtf~%WLK~QPT%$8FSGZilbuQuh^P7BM zh@9z!Hzc*r`y2#V0gS&>6!&eN!&Du!NiRf4ULM`b)C?h!e(vGbO;N+dX7d{WH@O1% z-AXjIv3qZOd%a6F-1TzGv&a$dhG+@NOG!6fzh8Xo(NfCLEq8eVD|aZrrZ;gS-WDdxh^R}~l?vg{5?M{}@^VIpNm~JZ$3RzZ5X7!9OmcDc&m1lQ#FE|LpE^27#;z>waZOhpO4EZ|7Gf1i zOE+Cu3U1%CV_4K{8>INQ=kC^8FA>ijK~rJ4J~2dZ$xYp%qCClapH?b;@de|HcQ#dl zy=`O-VK||z$c-(|WE}G~sEYj#|KbD!u;FfYmnzGr8CmUSGJ*PB&5< zSRcXu<$Af_9rvizzI)HvU!@L2Pha92o3zco6`=X<9?^4_{CU-HW)TPdxb)UEN^Di7 zgvs3@Tk%sbx4j!t&#FS#uYK!UG0=H;Z;1;C8x&8N$&z#tRG5IY7^}roHOeju5sifV z+a+ApA$AK1e0yB6_xu7Bm003Go@a>}6fd_erofHM4sgsqHoqp@x%c>SAU~X?I)P~E z-k><|hNR}3-8=hH6l*8!ImOoX#)_m>n;+^M&P`=~kKty2A^)b>G=kWUSo$Go`BQPF zkZ`-fn9HetDAKy`{M>!l>jAZ9QCC!ST1 z?O398TNs(PjRVr#(c}j1T@d!d`8gX=1;Lv~l1FY=TUl3_b^H(pCXA1gMr92}o;nY> z0&^mdSb{$asl=Hrc*)pLc4)3Qptty%>uWsB5_AHWG@+ua&m*QXzb!|z3a@BsDr-Ib z)_}W%IobFQmXfxaB~x+Wh2I7M1B7(O+FG*JuSTlD@5uEWADl-EF4(*Bjn-&3$A&^C z52k(E-`Z6pU?*n~gnLfn);0#No}g09HSxm3?Cv-14#RIq0aIb*diq^f^0-*X0ngls z#Q}~7_2UkeQ=xMfCGAMBU-(FY4riB?EGQ8I zzHw)dzWE7BJ`I4Gu-r4XQ^EITmgV1tCzRQLe7~12L?=2e{hadMl_k6!X0WxHseTx+ zvMNBM4b^)0ldo2#yh%m|3A%T>%B63`RXuN(EH5m0Rd~UT<=1Dfno#cxLtiQ^>jmHv zn4LOQgh=N}=G-7OGj<~6urJ1nHGn&6;FL4ks4~OMg<8z4<=!AMYdD0c`O3_^&{>eb z*epv5t1b5h~W}S1{LlQhmE6JoF%S%s#tv%3f{9(G5a@XYpZa_jzG| zJx+&tw3EA>5pn!rmMeF=v{1)D^ZkR3VurVV_b)>&fT}XRo1Ubn5a5@x3^dysnp736 z4_$p1aGuO4DO2Z}^0PuBwhLbDz)Y(M@SHj0;{(oo;VDp$7F^h(-n&MFBK^&@8>7j) zN9t8xH@R&>TJ_oDr`4w~q6}ygY-`v&N5$kmAgS<@!W9xBxm88(eiH)HBQ-Q}+_#S# zhcnAN2r$*7^vY+0^a;uL@W9jk7dgsy5TxYZ^?UDm?NvHKDpH6{}Jx zd!{3aRn}^Cm7daA$cJ=_ok}A)N^3(}=Cn^QT^%xN;38hkb0017ZNkiI9Rk%u(^ou* zu3p}!7<0%(fqH;$W=SdYs4U(@ZV5wDZ5vuiw>vjyrRHBfh=qM7|5koYctQWQKe#ji zc(2=|3RH6*_XwAH@EnXbR7MHzUeN0N92fBTxen{CTlqvp0;a%M6F^}?Wgc#xd50Vg z*M9--;ekeTg_vo2=!xXndvWL)|7am%BO~LN=lDzSU`Q8Dfwgfu2cB&IvF({s47SPLljO2NiMhc445Gd1c+uj^sMS55V`N}!TJuBa1h6V)^^EkH)V{KLIU%ZKXJX|4u$I3J`=j zg%Tu{@VC%`9Q!wcR1!`E=(JgKnE}4fDoPALiLNv(2Ay4DmLre>E(va)15?z@2p!7E z0(f(qfzwA!0+%v&?msvuN}zIH5v~1z74qjFbB6!vq`{?wJ}hg*;r}`lWy=@~|3>L0 zD6t97OaAM~4n(ImBB!b<^wLe|2GsNNTd~>i97j3za&}6!4Qz4n7Jot(4G@hK65h#_ z|7yHzc07xPE+awwKZzWWCtw*LHpo3fkKC?98jvNdL7xeyH5GE~3YQS1peJgNRZV8@3Mc8MQ(VDJPL)PAfHx#r0)P<`*x9@pnuWX zn*97Py2wn4r=1#DxqeZ(qgxz{QOiL4U@6zw6`@8OB7CwyoECgVai5^o=cdk!H5v~^Sgh7}__ zAR^uzTKk`I|1&TD^B(;FbA41h%bmw@wKF(C7hzOZGiqbOudM8ZuKOv5M|Y|!7Cwpn zvXuK^zZ4L^`zw0Ae?`s*qd_7eHlPe0a{wR~fFPqQOHl~M$W(o-`%Liwc_w8%39f%9 zh%SB93BTG%ws6yK2jqZalY0=k9ojZ|1Yn&E9BcWK`Wl#+x}WZWg*T3iP^%$W`XkiE zm?C#0^`;axHR08&qjpe*8cG5}Ewwo% zgRih0K)$@0Hng+qClU2}0#%XX>#wR|Q!gb=iK6(NC>C~}U0HrBqu_L6(M=Vkv53C1 zLExWsMpHoCD>N$M)nA*N0{IW;yGjd*KMDofHTW&W;Pmw0k|+ zp>@V2fSc6q6?HGF)t*|_D%0;XGaoqYzkR0doiY zUN^xNwI`b=3J%X1a?>Bt0~p}1V3vfCfuf4z6qE`n&rc#}#)wfNnrfIv8UXQ$Xaeou zkgDBpCKy3mWs4^W+;~~cM4cHxMC6{RHYy&WYB62BV~+|%Iozq{{Wk9}v3fy|IIg89 z7woNub2CE)VAOtuihTpgw_MBX@whU-+t9tE>d*s8B6t3gC%(&vzXcCQ&|usmWSRK_ zp^KvFT{Ct89}TDLsv|F(u!&OKC(H?R8%0O=&=_A2;pOun68ItQnfFeXQ{R0%CQ=MoObi_pqn;so1-Nu zBdqgYQ1GeqW@*sZbq==5bk?;*&%rh?8z=*t#Lnu>E@Kso{_=aySmhDZz-Y!FyJ2nW zk{J};oJN@RE-y9JHVG3c>`D&I&soNB2C>nG0<<$~I@{*tCs{P+uk=x)xwx13GUJ}& ztTJ(7kA@mG7Y9sLY$OPi6T&6-peYl9Z{jRHJDIO9uMB1OUh63QaQj~Go-sJFUB>!H z3!E4)UAnG0C+D9#YCe8F2IW%YC36FH!Ct>8X(w4GN75;f_57m`+G>DDiIm>+nJ{#2 zN?NhA8%@E?qGh}Y^nlX|-F_oz+uGtV+L6GUP@kQWNi9m9?KP zM&pc4C`T5@g+Y$_2JY%Ia?k3@BC06lhZTBfJv`zidR_fjMRt(3VKH@zD}((|8qhhc zSBEDuT&#pou}M`%LuYhn3(=*P4%UbWNmlcH<><@ey{uTK)fFi40- z7`-q5cx8tdnUF>YL)p_o8pfGJmV0)~I^_Bc&p){{5lffFB;}Q!rQl7?Hq8k?zT{-B z?45D{Bob@hYn&hm4xUwHSR$$RgYl5FraZ89^qdj0ouaO~I`(pyV9*<%0jVW>#kNFj5ewOyzvjs#1{f4(plt_OKa%1Q(mpE(oJEy)M;bu&blB1X4BHML`TS4_{tLo;5nAnGSY0x7BK-$JX$V1#%`HPkZL7(3q_Njx5U{ML(SziXd^I1H{T?SwtjHG+ zM?0n#_O=w=NNXZ&9HSQ!gYX6!VE0Hh#r9`Ijp8mmHHV6Eg&SlA;F+Z1fcE1sp_WK~ zQnl!iWzwo7TV$oz>IQG@Y~Su(h17$7>w$De_p8t`6X=;ZM0aOnFNy=F(T)H_yEqmh zbcQ|+s!ij|dl`G|LI~YduN(N`@~`bTLDxnvMe>hozI|ffS5oD=Uk^t#J>9I!R~b?1 zo#EX|S$DHA`(a_LXakG@FgWgv1Jff{@#E}D`mv19PonP4$wMYL zJRzMx282RFvQC>GlV`+QKrlB7P()s;z_oX=b6`Y*&>&TmQSG+iHkSp@lz{Sc>TBn? zoT_gZ-sj8l;M;=bA=cB0@O*YqvWEf+PPg?KdGGJ!BuJcyiheyH(aa!jhz!)}Gu8Fn0{Pv+4=ZW6X|;u- zChmEx3cu=c>ip@ocP*xcO9_(98I;OT?u5Ty!QQK$bqw0&js+k5B9RUD(f_w8ds6I^ zwmlu38ERWZJ)l$=E#@Q7X!hq#XWoj^loRffT60drWE#?F0gl9}PhwVLcW zh-Vo}u3|7;jEWA=g=zE@pL5d4S)ZuZdlq<6-@H2faqAFk=@T=japfCL^^KAp&IRHZ}P@U+d z%E|{ouYWuOh^xutPGzA(1ppJpBogl%^)eAUxDJH40Muf4=mY_v7$1zmG0s1UjvxL9 zS1`OKiatJ*!!vLY3^|Ox!`GM-36$wSiEhe9W;O%Y6sU5RF1Pj^Ky_@Q335vKKi-Z^ zAm9MW0oUpWsAcv&`r|7(JO-3@{`d|9)ER#|;nzO+k9{ClU<^IB^l<=ufG^e%oD>xQ z1t9yUvUSfZo z{)~ghO~U&E=mIdEyZ&n%zhn&m@Cxih|8%}7GvwY06elc)uavDxg${{};MwE|&j6le zZ(u_H^q~H9E-&&10u;E^3*CT>LD2v92ah@GgnW>zP-+um=eTNvq!Jf;q!xp(-l!S6kUEx9*!+uzAQe$*U-*m7hTd(szUWtl!2qa%l)z~n zzK}^5Nb2|ii8wOpCd+x98h|`U5S`_s`-?fFqCVol$oD1^I80B*+i6MTJW#PV1WXcA zw`-%V-WE66l10dzYOb*c zXJqe=k(tm9q#{$2BI6?MYm9*E_un*af7d}SEB;Azjsy4cw}c}6S3%n9H{TSonWTMqZbSOYA#{LgoY0z)m|CCK$chmgGsDJML z+o*pW#k}tHA5O#nOc!88{x)5aV?xc`8`YA`=6OYylDdh?!>Y9PLhaeXOXS6Qnyn;& zYHW}{)a-I29cmvNA6W6yL%gA~dScS^^~kJI-F;n!)3i38Kg56h zCQtqkIc?3qZT~~bSx>b-eCX2ONBv{(-$(s^l>Ve@^4$lllc~R?n|~L0R{FyhkQDfD zM_3LJ{o9NFr=GcUL;mk${<-syHx>chnAo`i4gPi?tH4R^lN85n^|T!Zig_AK)q2u`3q@`E-3t@R6gfZbaz{KFWP|FdII%{<=X zn8_dln|qPT2;caN=27RO%@hy0>MGu8!Q0YVb~dqj^|H>Y@*@wz8Nw!gi z_3u@L^xk_i5>}y^xcr@X5!Mc&aQ9!F3~<>t@zM3#R((=dR~vU~qXpg_I4@m2UUx;b z<1-P_mOwR7xR6;$M_a>jpmDil-eNr4FZ9s@^z__%`iUx=rwr4F=Qd+1R&K|E8uwL6 z6}8;#@8Dxr0OvQak>+e$5Aa-*0auzw6R2c)waOvJp%{-s=&qV)0NpoFzC<7^k%rMY zs)FFzt<&-GN_c)W$xk9Lpgz;O3Iqc^;v2NjF8#NobpG8aJ!e=zGv)Asjg0+-^tAo{ zkq4)wRO-QqBATV6pDWGn>6WW;{4e%D%76M3zx)ae@;s}RRMc7EucRj!sPxrF5p&?; z|5K-GXVp4-gvnFw{0iIjW99$FQiV2)`EboO>|)?6awrr&j3r| zZ`)$_`4xb}^B*3L>sz|C&4hxV$3MjC`8zj>$25za52E+5>CYRFba1DmT_D?;xTn+ihgtjvYeUTn~d)2o{ZC#{s^0f;dB^;pe z@ED~fzLpU0hlTD)qoGES__k69J6oeNUu`Tg=QxKgJ+@<1^jMYkg5Ul#rd`!L{RCE= z!%`|or!3caqr&280_I1E^XdB1h?ttebt#v*Qg(MYktstf2iKx*k(4kG(6{zml1bm= zj&7XH=p8hc5NvvcBd!$MYgJE1&08Xni311f1TAD0M~5ne(vmr$&V@2wFqxHMGMGW7 zHcchXDofO^c5I7?yv(ocarwO-1a17KS^OQWLl<*RkoOgbgRq;RN2Qw#&yoTP>c7Iv zLp>6EB_0e#C6- z)MIC7CElmOJ<8Lz6MMLh@_{!xhPSBOrtL#9Y_)k6{i7V|+cm1vYZ8el6d83}ZyMyod7VD> zevIXd+IJw|V9xoFxeLByUaef*@nyd6O(JQv^fa$Fb4MP#rgM*I;dy~^Um0&->`}Y2 zo!FUl>yeO$Pr)ot(Ja_-PSweRVvFpJw;xlCc|^zc;9eCm)9bUnaA+@?Pe}K1wIf%NXRR+Tzxtrg3Vxv$?Z{w_GXrv@=yXxg$KpVAW0}>`D(12&rng1 z`HP~sPDdK=r`YPCg$U_R&#khag0-4E8@@M3UPipG`#N030qSNSyU^Gt#?KWksmvKi zlyJUgpDG{`acRV5poSE6>d`Z{T?xSV-G$=xBK2zb>^}Jm)h2)=YT_fGnSpbX^de{7 zy$-`iLAUBlV-|}n@nGS6@oim;i4UszPid8SLLah6H`s));Jo)6HnL!ixD-?q;*+C=NT99Qyu;Hmp&Jc;+f_w23JLd} zs$z^KmO0O}Qs;r3Vd6MbEY%V;1f=AcZ<8!Y+;*rpGmv_8@>ENMDAbWBr6sz9`PtVM z#<#C^BNgs{--Oc0GT?$b3==;T<7pPs$rWkC>qXPWsLPM{arWnAuFSmI< zFzNm`Eo16chjEd1z3K$`_ldb%VyM_%2RN9aG^)lY(n1q=;FyIM#YWaS7R+1g-GSOf zFz=4GpCk|1+htJ3Y}}T~;;H0Rkmh~2h54X<-P zF3@aaqXAkcQ3o3vqy4JL_0-^~^UoD1A^jJgOJw0VQ2LneE*aTF>HC6swX@t4p44i! z*{062Fy}q#;uqgq z!r=HJw~j?LDCotD8*d*J$JsM@&2aeE04b&2x|}M2R#>iKz6MX3GW%#HLf|m=EL$8X zL|A{{8EJn^7*1$^+CXjfb#6K=3L;b8*@ajSs79IcA5^zc;)!^AZf)k=y4KjHpx7LQ<^Y) zG=pNiX(VA_d!=TN#Pfp)?+?m{VZBfJshG$M;XsPh%vuO$xT$45diiCgmxrL{Gs8ZC z`rDsZ3?FRJ7DnLOS^Ydp=09p}x*Zv5bO8%W?Q4TWN{=rz6>b7EiTlmY6!-D~VR9G{ zDguTDFfhLxlzPChFaZX|@;rFVKEDPC+Y)~>Fn_m4Ect+4O8#y5e|Y$+cttvcI}M>7 zh))5?MpI#s$eA2%FIR01YB!As4@Q1h+PYFfo6+owcuZ>CiR`~&LMIC1txMW360b0UC;rZ(09cnzO^c^Ao1zj+fX$x&~zWIvu0ww%{A z2hcve8{l#Jji*Dz$(C8S%5Q7^xI#+IH>4+*<6iW1{2;>VaW$d>=WF?;PMAo3^g$xo z*X3o~XbVoK^b&8r(MNLZ1|3KpsQ=@dAH9Mb#kk{fS`s(b1hF+u}5{*LX*hj z`iQMOWnD*?m!k0!>(ZbQy1silLKSyH7;^R5dg0OrmenSgF&6Dmdfc(uhq?0JOuzA-?&jx$ADh4nGeZPXXO^sN={%(#YP( zYgaqh_(&qmqeMO>Jj6mM+m1TF$4%0WegWnX=LD??U3`f{zY#9AEydFns+=W?=QL-z zSUBblT3GR{MMN`r<>$m%KMCl6^OYo8@>N~>OhwBm{sGR5H^$M%QY`G=kEX_^culkr z;d@4AWWNf_WrX?S>US-1Gor)`-dV~9zHEF^vaDbO;|(V^Jq2a^{Q1bi*k}sYB%K-OK#5X4DU1R}kO;~-p9$=_6&3yN2q56e4%+)Xy zGSozL3kJ*%wNfmX z?gBN0-8r^cpdbr0FhV|=K0eC_$)Vhf_1c)}CPRe}*fg|9vx1(~mA}YsQ)$eLRqfe+ z?vs1j(0$DQ_#x9hl6^fmhjRmjcwJUpgVK70&2VFN+%|zO*4=Q#;Cg(HyP>|kL)3}@ zmjemiw;m35O9cB^BmaVaBMW9wZw#-1<-w~KnXF#}f2(M>J+aU6GIPtHmnQnE;AFo| z`#wo9HfAL+>9y&Y_j9adJ83hq`=xeQw=fIJhcuv3}yWD?Dd zHObS|2-H!(moA4KTWiSIz!GcbwoOB=1SXTja3jz#YN?K8orYNH;pa|sC^ zD|_4xRWG=n$*ru?J=Qt*k?7VfX+^3ARrCtJ6xrikmlCV}8DPO#g=RwTZo&MZ%s@pa z!h|ghGFllHap&pPUc&3c$!ixA`TF$y^n6;#W4IlwFZcRK%fMW`8BW>U(^bd36_)xq zw!OSIPZPe@Dnzc^GcAO2u}A@|wV!m%uQ6ljl)BfKL{{JDvLg}CbsK48LbkW?}U$yuDFQMO4eMB?TrlU{Cp<*j0q^x==I5xuw%EpyonQwRra>EK!%TSe$mg5FNQ zN?D?pig96CJ(J+2+E2RgqMcN1ZfxYR_D4UuS{xn9GreDtUa)*@bo>X2C~rl1rXVb> zYi#t`|7wGix?8ukZHIwIbScNw-->d88) zin53m*-7`&4y6o>equ-*imIawASU_@s=nXd?DCEwp(@&OF>S6Se)u-U&*?Nk` zhX_)WYhUK6Ip%Q%^Jg+<@##G*r#kO4V}(AuY9 z!7r!6H&}KICXBD-OCITM!dro7Br5huc9xmQ1O~yW`~A0HNAE~Q-(3=lkX9b%x~F`h z=sQpQt=_Uz0gcVv#~7t8jy=V z5lz!^>Zy93>`e|0Sze!!ADB^e;7Msa#wv03vVQM)7C!pp0ZsTfhIfj01Qg_&;|T6} z`7HHJv&YdzSrc~B*0HFzVeYse86I-&9^4BY(lWMmO4H62yc!=f<1U9IcC3(cgWj}*5A!ANl~J%Ytid;6XUPyng*Nwk|Nce6 zB{Uw4q;oa_GJ;n|%DOi%eL8Obs*!ECou-9MZO>DG7g-we8f|e)>88J0TB>@LPxcdQ zft~J7#AL+=ST87Y!UX-y`{PcQNd9&nD!sNYR(N|}XCK6M>6z|Q$$h})B5k}e#*7Z( zkcw|^k*C=va@A+1spQ$RIE?f7Xq_oXG7zM6`a<`bqI(~7QoYFSWWHjhU08Wi1eaBk ztlMLdy|hQ#3RyI}uvRZrQkuAMBuvL~|1dVAqP!Mru;BMX^b?izTo z#A5sEqeqxUz1b3E(uv%*tK8l&!fKw)Mg@6&?Q}T=7sZSje`nk(T>aI-N1*75qssfW z12YbSX%YTShE(ty>}aX$41eX|Z7-#_U%AOAL>9rW)fAr(N-Cd%SMT1U`KB+pARom3 zuy8MHT)YmhlcXdkT8ce%05L|w(xmG~Tw^3VS!*_&v=}Wd?%qxJTh`*fl_g?xqZ+`; zh^`V%iw{?TdjzB-v}(+>sw;G5&Xt?hOFhqZHU!mei|P#%ER;GMy6=-%{_uj%qeK@} zT6)>HHcYwpqB+O5k4iF4yV;Xm=3~>#{Gk3+Vn%uTIJAvwz;|PzoJKac7)*QII6+}C zH;LL5|Dc^EVU(EZBPvP~Iwrv2$ zgGI9Po>Ttta3hwRxy&AJc^##@XEwjzDj@%oYh*{)Qj-8%4b{_?wfxNeG4aS6AQBH9 z$plgMDo^mmtN)lXs=4;&dmA>!?5Rcg_*#p8uN>!>cGH_!?;^KU(7f3-yJIl}u#xfl zBy0&Tz0MUODz?DSX~?ZNalzS{u6V~-|J$@3yTG1OyWinsXd7T$n7t-<@6(uNUHRY` zZe)5rLgPxbEoWlid`V50?I`(#8i-Xo_oBcy&$PF&=;)Xp7$;eDr4PxnkXep2NDY2({z4Kf2Ztv;Hc5(h~>-Vg8_$8+`Zi;ry-s>?>6 zGg?m1HkC9ZY`A4w$-{6pzLOtR-ML7)%2gBJWJ%_G8S66pFpw1}PT344#O=X-IcUq0 z*wu4XauulL-xvV00C5TVh@q2yoG+>nUM)Ym)4pdRm_)LNZbjaL%-kDsnZ9f|vN9yz zPrr`2>EE0-xlYzCQ7R?bYGpH$`JIBT_80E%Ti8?omeIOp_CH8gVphhF%;mac;CSS~ znd=y_S;u#!s4{eeCqgwg6(Ht4TjqYMz~v|%^=9PW?S1}PSm~L;aL?X(R`NHcN{>W? zhoTjaCL@CvnG5_8^11F2NIG|TgS1vqIyNB`or}jAVrB&OtE%Gq4X1CslNfm^P^ljt zN?ZI9-^Xl$MbyjSqZVtp!`htIhauJ0RTjLc%t;J$8Cl^du@}xeF&B5~+V#?Uqq`z< z8aeOi#-$4?wmn?cVrChf)^2=^&fOIZwkg-9s8WDZ z1d5jNv~b?u2XXhH1$Um2UD~@TcN^cXhSy3~nA>*JEjFXTGl52Iz*n5VqhKp>Gm7*x zz1bxoG5M&E4AuN3lB+@+jE{zbBKJ>hXV+dqDnxY8q41t_-ri@sRFpg|z7{XUcHWUQ zRmpwOZyrygm*Wyaa|F9^TsH>0Lqwca6P!x{KRlOcUc0_JXeKAV~!5VJsl~ z_T{pbUA0yS!CUmI`*2HzwC|Td;j#`C`^;m>O~tzXH*vNRUFr@DM|odcS>a5A0_Ed6 z%ebNRib)EfFGmm!Lq>Q)x^2nJ;ds{utsK{dBu*4)~O>8mUUHg;>PYq2xho{6+5|mE3_!0>N^8Z@CL!@0tLG5pKyCrO?U?uNLTu^5;;WJQ%BoKcD&_;2 zZ-$oEFLW86XNdtJ5-D$$B)$v4L#)U|`*&V5%yvX^T-72w)+1d^5Y^bD@vo;RutRp0ljdto z^xDV@1AjFNVcYq_UvJ;7tvgFja}Etn)gN_=LCcaTZe=mJ6-LN8Jt-V z1d(4MH|x_X$5u0ZM6K8V!f$0H5sjbRr4jHvO{fh3>vq$?o=5z`><&9abXJIu zEt*rE!;QgHkY4zz2DhDIxkk;MNO!CEjWXGCo#4HigLSxW-Qk4NL6cxEbZ_|REPe=c zZUso)P-Eq{r0s!|Aw^#}?y;UC)dR15d11@z@t-2)ww}xw^zra_QXYRNo#YEcunMu5 zEZ3I+R^}+~A;YQW+tC=??SW3NfMMr1x(eLUq&|TJ$I;tWC40&h^C)nuD=&sxK-nE# z2AjF>;N{7)+?q!pc1>K}bvUKaj~m4Bjm+_SnwciS7<(L0R=`9MAIY3pFghOCY&8t%yM93 z@o$n;)V%`|+yJg*J{gywSZ+$#j$2v9>={V>SV&k$3KijR`F}PIOe&)(j zDC{ZJLQxrx?GN}au6jVoWGs&p`VN?+Rt!?iz<5X!FP5^knpeUj@x5MXwm)PjpHe|Tp% z7PSyU39hdLfFP}vRm0Kbl}glS1*Ms>*Ug_?h`l%Ijnzr}!0d$8+Ez?9=oCTa;F~n8 z?vxG>U0ELbIZ~8m$8?X?J$Z`nj9XisJ232qHdCaG(US|*PIK~KoW10qqelreIqrVD zn&^R@IAP6|Hk|r$)s^rVy#w!7Cn58D;de^usRQG`TfS;t=RHLNo_{=Rbm~*;Zs>U; zd_1FlVu+xQ6$koZ?=7Yll!obcpo%PfCxz)78`{>9x@Y(*f0^u-CEY!!irF6eGeS6{ zJczj3MTmoH*L+sGqP1*0OdDR-|pTJ(dFa9`L@xi91axihmfk3;I? z(;6)#6J*CCxc9&tV_YJWA3UKcVB6_tn1px)JO#Q3{1Bwz+C!Ci-IBWcRe0T+muE7; z8gIA&)1#(;viCUT=zOGD;l_P6%Gx3>nV%dpx_aC$BrHCyv#fw*7o~3H0KsY@=rJ#G zj1?MiC!wn_aOWkNCix@Q#_8ZKvjki+wOKs6WUF4N1|O7WY-)SDGz^Jr%g$A?5l31^ zSS4*UMo3O;nL2!@YUwMQ>=d`Lt3{!mM$XeY2W5p9k&KvMQ+5vO3llGtE5xAK$E_=S zRK3?3!{D;_{enefxGu2xD)HGBZdI8f_ladE?|GPdc_xSjrzf)+=qxdUF5WSYJ$XSL zNo!~ZiPMUP^A*R!87Hh@Ir}GHXq`Iu1hf?Fl- z_B&nbF?H1dQpjUFk(nz&&qQZvT1_BBHDn6WQe2>S>NC$BmzQ)E!OUA#a|S9kc{sIu zM)WAxdf-;SwNmO#6csT8YMW91^+A0Apa|V_i1=NYw9oSckcA_j|56QeLn#3O!ka9y z{lN$dAj<}bh#ii4nQod=XZ^r;H=uEh0M<*xZFEWoYk|kSS0)fn}oZqrD8(jCe zEa4<>9jQMUB4g+A8~`WFv1&1fS(XDJ1N+ayt@#OXqz62c6e^lT53;(^4Hu;Fb{tY} z^=A!eb(Z79JIu|p*tr^I1_^2AI1_Hd@_A#oDst@&*?Z6vGfMIc{A5LVH#C?WsFlx7t160!)%OPn<+`IWLWA(8cE?qNyYH!uCAT?z~lj+`cT&!hcDB4nev$B;USoGBvi7i(PEMY`q_? zuS!$Mn#6ySaW8mX$^KT1CyVoz@`Nt`kL(v$9zNtzc~vid?);kiMoTb*III`Lezz1|5vrb`()OM_&{>`) zKK)h9i|AEt^y%%jsQ&!)-D^f zN60|jvL?L3I%=x4N~c7oMUtHuJF-GJFu5@c*KHx8Z!3(Xc+kClTcPMDAEi&aqI%K)G#BO%dPm4yme3Mm>O_|cr zNb-A!Hl4fqoxw&~UOZSFjY7|P%{RJj`X=m+iu{b(WCiJmjm# z&I**sglGRS7em8A0EAc~bEO|jEk8Z|si2m-?TE}be zE=XwmRDW8}oJ#8IR5}F@Xr%8Vh&VKpGhuhbvpzR{{ryh7Q|hMcyA4Jre~Y-81&L^8 z?dsgri+MEflsx*fu|7a>fDJNrw`p{htfx6MYRWJX>%!dYB+Gbnqv$41o$|p$_o?gM zT!%s$I(Wss%Rmm8u2cfB`k3$bA_diHjLE>`HdbEbb@X$4KT~Islu}kKvU>D7GjM;B z1_N2%GqPnmb&X)$Fj1blDO6UuTgK{9is!Aoh`aLUzTyfu8CztRg|+T$4=#QX_tKf! zj>HStem;(f?%6!t{h)_I`nh;e>aHQ}B5P98|#6upu7_xT(a}>_9PL#k{dSLdtKRU z>E9!Gjh{bu3LFx7>`IXDMXGB#4|=n^lQzG9-2bKa@jOaOV`!HZfh!+-*Gg8?-u5SXXB328?Ru8cme5}p7zGQcxbxM^Rf_|kNy@nK3cF26o1k!7Q4{)Bo{YJ<;^mI zch$O#!E1qnp2lg*{1OhnIJm5HB|Leeuu$NN`GbbTf}~O;rbKy^@CKIBd@YtY$un0y zwUtUbr!!xus0%A}07-l?LE815?0HXCH`iSq*UT?}kl4yIyZ5V&N2&}s*F4tiRA17NT(X&$`roSF6hG9H^# zuIFk5OOs@AsdX(c-=KS#@+3SZG%)8U$>rCV5YQ|;^o_D}siwH`5L8+8K*k_v;x>W< z!M-kPc_(^ho=yguO}nscUZJAvwV|U>mTT68zAnevnU5z`#^O*mi)@rHDn@&!&!@sN zXViptQp4ycgCzMKp!Av)Hkc`d$^8z6EOsl5DfCnwsD_O74s;cKephB{eIBvdI|@YZEBF!;9)oT zj)lF@-1Vp7Cz1`H?r8jk1z*3S@UdZ`-lH79z6ie|d(1&2%S@jA#wKOGdaA%9J06r4 zq~p7)?rs5wL}gz6vRlpT5Y4;@ZhDW4&q5dv6BOsm-=q_iusKkXU`cwO;OKR#o`kdr z%2t&}owT=!v_=9tD>&Ctvt=v3f=;#BCB;!EB)H%}i%78^lSC4EFeCc>O}SY6+AHCQXZOS(MbMMkGLCI9MGX#iIdl+m1KXqRi@mem%{smWs?+@$7rpV>M>k@cDuaJn@Gayi%_mC|v7 zNzO)`S*R=snHfLnC5FLmAKL01q3ZkL7LWkFWmszX_oprGT(rJzWOYDLpSz8hDTv&3 zeq^GiU0+SqA76@@sTS`q$d_YANY#vO5!7AgO$159zh}6LfggrjKW`)cefc9rW56d zcNlg<>Z4Mp;JDb zzeJ|67k~i){g89kk!_ED5Y&Q<(uuia;_kBx<16xlmX2P~714>s7MAiTzMy>3(q7Z` zg49n3TBT>#Qt3L`Y9Bg>dMOVzjhZUI)^-b-2F-=<9kR=ojdDgnO^`2WH!kcfMuWFG zg#$bUzw6enGe zRoAqOJ?$t?WoYL981=3AtL}G>8}ThOclYF*3qH%y4x*O5r1UCFD(Xh_^KVD-MXLKQ ze?X>8^$n%>;2M`E`Q=_%nllrPX0IKF1xf?@+A#f>m1EAOjMCG+TM?Lc&+CZ~huFk+ zrj&50%)#+5!^Bnfbf@kQl@`00$ZUHeP)H5#SuY@&BBKD>+Q!e!1{NLBBY3d)v1xJ? zfny77EEr^|0+oJ+cDx%|lmWNlv#)KjO)9Jcw;P_2FkCAD#;vd7+83xtdadgW8?a>N z%B!o&KEk3>9H@#E1h@z7{T>wSx}3asDG2a>h@WjcDk@eJj+TQ^|$eADrsD-Gnd#T zWw*&wiK&jOCGD&Du}2q7wfb4Ust8EyPZ)(}rn|ovrP>^mlNzzquz3P|{8GI%#whx1 zYa@g)8FquxjRi#ED#{nb?MT63Ts|V9T_HS7j6*KP^cW909UxQ9fcTSZTz}rIxhxQR z|EH1Hr$wzGHBTXF93R0gHrqHTtn30MY$r_Yml z$~^4Ni+nnxUx|r_*M58zr4yczgJ_#34A{d z*QKc8IK9M~`?q$lZJ(KHAE0~Gt-k*__%YrUP2{Z|B+{87E%EQqnlU}6(?`+qRw;J- z8~Snisg~B|fW?(4?4#<(3L6hAKHA&n3QZi`cn}KTMM*=o7BhZ&NVy>RJ#Gg_arBU3 zr%dX=x>DO?ehsC3a;v_-Y;N^Rf15G=w{Q9pfNp=aH-P^Bv@O9MQCka~Ld`a{l3?CD zL388HzX(!Uc+l34z4NyDI$X51_k6i!JR&FMf`ja^Y;m&cAeH?xGx<=uqrfRb6l=Su zTM(QmSOq`Ci9&A096K5sRehe^sVIhMlxi$9K2Lk{0eHaTrEnp2d*eQGd{DXj1Nox% z@KUe)aO5DoEKKxFQU($|Dv~^03b%GuGWsp-j+3#%y!GN zjealUFUmz6M33`H+N~+}#CfN_v4G1b$vT3nysaDu3%Z^dpOiZ86ADgvP6eax+NA;t zgY?4x8O}b(m&Riv=sqsqw zD&mSnuU3oX7>$h!s+C)R6YAsu1N^i_#v`Eama3-a*JKwGUvEuOdAlJ*@ac<>xtiz8cbw945TY%M!Fi=|)c1zFk11Zp0GeYXHoU@fz7f1?hv#%OH|@M>@LJea z?YhKOm2rvQr=Gp`6R_YR$AB0v`VD`^ zObzr{uwXO!=mFsZ{!w|SFhUq&9!(=2j`|LLGNhRkAzQD=7q|S1ddQyZ7Ry*6UI7cN zxo4k*86PcTr;eqJ9F1jTDB%un{D~l4D_8O-OW#~CrILLXpt!t9LxOIhoDEdn$CnsZ zNRJFEc)k-6Hc)3s>0Daj9Y_gs=o?Z@Ww;HOd8G@-a4s6u;2X0I?O003;&gwoJR8&2 zaB-oop5FQP#g`M<)akxZ_)&7%w=rv7auQD&*FV2Msn0aX@ookoh$T-4>Q2^n($Cyp zq&^&qO|-R|UU}B~>%5JFU+`E{fS*WDkgtq?Srem1?U84!=-}c!)HN<9 zQM>6x=dsnM6u|SOo31eA{UuV)+DM!pdXIY72k~^~c762-0%dPm7QF#k?X4^?zy0i( zNk89W#w@+Z$8|@ct6;b)_nQaIW>CIPbf|kt#Cj6$NtAj|gs*sv9yf`8iD@={nI>H9 z{`2lAS9yfGd%?Z4U)?v}X7%z-N8xtDoWTBnlL&U1W6G2h$BLf&0(~%Sa^!hV0%&TGFs#PG2~kMe z-?j%zzikgR>#qI1>%qX~m>m_*?2WZQfQQ~9T z+1c;wEu-ioRK@Unudt%DiL{Z!^M=)<)xbY3|)+c|J9tyeTIE@kK3N1YH0O`pDPL%P& zI#`Y%E5t>aHs4=EIYFuOQ{Ak`td2xWeuh5Ew@4f_cs8#r`ztl1X#ad5*>(e2(%*cp z{v@b5z5urUW?0m@p;PX}z-+3~L!!G=pyqL7;-yH>h?~(DJsU|;J?5?L(+c6M7U{p2*b;>BcxCHlTvCtFo`Q^v1>yMP3{9*G4aBqWDH zU%hemD&o-`O!vry(KF@hO#2NwaqF90_0>`lDl9qtSBoPH8D(BACf;8M7)rWc9@U%p z9p3yZQ;aw3E%sRKezP+6*bng_-LBdfmE^}Au(-eS3Yr!u6|Ce^M&jOg-ulG2IJ=`= z&M2uQKR$i^)R_h8e-wmA+-FT^DId~KHc>8urQNU4({4)+xr+I%7D0oJq|nynx*aa^m>y!Mg6eR4Z_JC?YJKD zZlIfdZNiP#ML@OqvWjA9K1F}Fb)@)@1k^cFU;k*tVc4PxJog?0!F@H1 z3%kAa)a@MyWB0mZLwIz(Qr+Jc!JVc-ao^W=fP8YNR|k7e|LH7YcBY?WSdI$vUJrE( zmw0=0T(P1qUfbsxeM>a0abM3)V`_lCp=>*ZkuM=t?-9j^pZbGxS0K{%C`k4u`<>C+ zXK_}(eerL|Q+aBHQK(-^QO^n+Z|oNtj4JkWhkX38!;_0~0#!$!~_XphEs%KAa zzJL)|%ZE9Kcn4}N^4F{D!(UgM;l9Cbcv?q;HT$Wg_=R&PJC~w4rS?C6ppW7CUY;M1 zwNM!R^2KUc0;b72OEYaUT{ala&x?jB5xQGz4md5X5*n;&^Q=_a4WHaMt*QY%no%V0A(ZJnDO#n+q&E(;9%v zBXJ^i98!LW9Lpv`yxM!>+_ugn5;L=Z8Mue(4jd{EiH9)q7FYW8*OnB1oVQHs`ADcZ zsf^|jCR#UN2!7x0SI=&QcWaH2yW{tY@hc?yyymmh(?KL)z2!D|Dp6}K$lvW%K0}cf>w`>AxS!n`l73R;4_7*)gZcU$*54CWE zeXBpWmM+a*`0>Lh$!?+cGB!}2(iHF0!cx6CS5p9(wx3o2q;D}wU3#m%m6m>;X`b834*Y=u_Kzz>|bq&osu5ut<8 zv{@k)QoEN2WU{7TB^1Dc@p3zxb#poQW+b3_CfJ$s2~1VBc*Aspz`zkesS?^sdY*WS zYcVw~OB?vkdr!LiYV(8Cdtxw${;3>qY5bi$_in>6gwo}p9r2pYUTF=MR*P~F3|2bn zzvpn{YmGM!WYyyLVz6+3L3=+pK<*5gxPv zCjs2T%BwR1pfCr^@^0SPlwOD~j;jNzHJ}^3PN4Y^bKGR^haP6FH?8`#_6tUj!Ug?eF9AH=d7zDnxA11o@Mahpl(_ zQRsPeCp&^SQL(`XqQ3FTL1XS_i$@(aBJX8}KNkXj>_QXM? zw1Lfg<_{7X@cvo1Bg_z2v54%!(y$Txys7u1YW)Q3bcQy7bjYbP%tkkY?oLhU$}3g5 zu_9#?-KeW~_XDUtFfdejVilV$p4JYCg8_dJY8_85FWmIgvsVWwSf-(6g|>&o?O@I1 zR9CZki?8qQZ^Z+iGSY9=$DLNdyAzuxW1_OUPC59|!CGzi7;?|Y(yX4{w!+^f(o0{~ zs9jVD{&bzu)y>{`Tt5dPxaXJda4o<79;4CFx+F%?6#PrzCZ`y)1O<#sthiZX!r+57 zosg-{Jh-9~*g^A9@J*;U|K@BqP)r8QpaD>TjI(TL|GYT+jgdo?;yenEJ=Al(zaHaS zKwJH3e{A|DIEMV$8wW~BJy=U|v>_=WT^K_U_td)LF50-B9aD}i9eZHhv1jkO_=Z_P zt9So_F5W0~q{RNwEzuKL; z{elU<{?h3u8Tv>z0B*>AAWB8w&0TcIeL-jTT_v4fg-`vZwM5cAjU9lC(dNbas@#t< zL~LKQEYHlUePV3L<7OunZ9>Zvx3;t}A(c-w6+m!(cP$UlIyoigA$I5gAeq?jepWPm z#G0UWrqwFVP?jgHLD&;(HivP#Tn$&^R5g!Xa;;PpvC^T9x8s$Dan0-QB&Fb4pI41q zBqAy7q*#LYd8$H0Ojl<18DA-K-E4rOnuQ1=G<;mY4nT2wddq8%>LE^bxDyUhcX^ys zJ+?f4UY!D=VB;z)A^goZM!C!nTAbR_ol$IF*YiAr@OfUU6YRRLnn3t_Ivg|go#k$m~XaWAtc zZ>&PX{ji#YM{ls8o0A?{S=e6gNBPAK4t(>=&Os5g>8_ih8_3Kzc+v zf#f~g4eci!fBZA18h=ve|9t&VM4j)+z$?yqp=z#N`4)54|q|;i0U5=u@`;$3OKy_x0xp z{bPOqpS8boTjl2|htTr?dTgHT%sThn_YaaQnYfsVKS;_YIPhm@Zjmb_qk9#u zGx&aqqv_w8s8YZg+fji>`{=;STX;h)SIS6OnK{aZ~e z?!mv+#L@*cv6eWLJ&=Ddf+ddp8<8y2WF?Y+=Z9YXhqd%S_tg0x*9HGjOaJb%TZzI} zG-BjXHYlJjBs`P2%T2!evc6h%deJ@hvs2%CLTGnH70C~MAS3oq=g!4D?7*?NUEf9#qbj^B8B&t?mc+| z;Aiy9J9BVETgg3hS& z(B%1dkJG;$=l?Qe_Rol2{<*&XeErY1&MFh!765H-nOruS7g7;3d9+idu5n(=J00mZ z&#a-gmbz1el_X!iU$z3=nxCK4qQ$fGzBVFlom~92h9cmj)@V7AUjHt|4VBc3@hmWeQW93 zrqF++wg0)dza54D)cpp?_#e2*A9EphG)ufi6|p6YG=3aU$6+r1_Q_K?w}78^eqe~| zhWw!N3oIK&GtJWr90~Olc*;C8{N$Nv8;~tZm_GwiMDMfly$H{z)uOygX})2}Mip42 z!cBKEseJwi`Bi5@T~xr5ov`@g;E^e=KDw^ItMcGc@7)D#B|_vfrnOHPq>0I$207N7 z9@prZW_wR)n?9PKWko#1CI&&~XRobSqe7F+KRrwdQy6{nLVkOkJF=56!V%O9`}?_! zFdnK|evdTG+z|iq<7HgQri}{{EO_2Ma2O}pi=Kb$u1<7^a^N{fktd8{crZKNA0&w^ z?Zhi(W<;fI@yEOXt?}>PO(in6e@3tR6#?aKVSPYU*OqiBt3U!cTNTZVp3LVV=YjwP z-jb30-!}f?u|VJf{2yq`wm|ZK-`4-!*I)bo|3ZH)l#2SxG4C`ENqNP}L6~Kqp@qF#nR?_-9ib|2@*)jFEg75?NzbG_v1451m%C;Au|1Qm1 zfB5TvhgSZgEByBt{ncRAoR!Ow+f*BU^~r{=tYQ~RqU$UmbX|KEcZ_?PL^2G^7mg8u9D z@sa2^`<-b95O;r0An?S`%PYad50q^$oB-0J5T_?J)5?(8RB;l*V4OAJyC5%a9Sblp zX?|}->Tx9A8xNiXJJi2kBi;k-Ti(AD-o^u@$J&2)O@?*w0O^651aa7mQ3C#%NMATA zPOUT6tX)@mPm!%h@L^OMyRi)v#JoSGAJ%)%GfH#`&83k1>RkK+5|}1@JFUOg2Ho{lD_pCE&&dawKG;K? zmeqwtD4mss*VP0YOsR-*}IWd!Y4 zLs~P$gd_sBYt{Si#eRR7W_I2NE@OXE4ZDYC)Vn7hF*W_I)cF;O%<|0KPi71`MH@xI zgHbd@)PE2!*eq4gS~&!#ZmBj`&o-@V&BFwuhLk)VsMA~@2)T;rDbe2tF_5Yj{kY>@ zp^5ieg!ENMo3f5Mi`QZo3;-dY>`RBAvZqN5bdm~wrqKme?uM-po zGY6d2cdu>eW7z!S#ns`N`2H86G7#-~Ro#qf>m5Y;YuY;oU#~slj;me?G8;{oD#!te zL_?F?l|cMXZUmM!zvH+j3*WYT&fDQX7`$W_^LEJrJWksfHE|Gbgfy+B#f$UO)wK6C>J*L5#_0#mx5!YOsdZyCEyK@iA>#M^_ z1tSn6Pujn6>sw?Ygz&zLT9ocw&EwtKw$h)ah0~<-i)EF(Zs8m`!zAybP$Fb*`X2qe zt2|78a;~x^eF%O$&7$ZRsYH$uD-(TiVZ?fGYkI$bL2{D1!@Ff`Rr4Q)(~})FmeI^N z9-6l>lX`%z=GYz4%Qpy8QRHP9Oe$-s``H0 zZ%h5=*V0SIMOQT2rgGd!gF~TC68-BKjqT8aTswW1veqq@pcX8qi;LFKfALB;#VdWz zr&8rO`1ie!t@bOCWb!V|z#<#E`5(AkfskNmHD=V>? zE%DPf&C+f7^x-~0eW#rG`p$d=*8}D|uT{x>+Plt!yf`SA3tP?SHFa;W4eM{(R1{b& zxC1GL+z;J$dcK!+gvC!xi!_XI8#R+~4YU&R8nBmgV&%_29QL2{gx0awQ=LcUkw>N* zx%;%s)5(oK@-;6UlrOLC5WY=WhWJI+>b5;^G3no~UE_P!^oT2p(cruEy$-8KWS+Ff zu}4>(_guI%?d*1S%K=Hzn#8ax=9_xmwtaPc8vE%sbrDAOdKymw8 z%m}j~1oMJF;gLh8=?4F`B;NEa^*3;d#fhhu2B_mk_^k-3#2M==H+2JkVFgDI%vccG zc&T|fGjx`|6p*_}&DxCPjX|`Ai`rirh$dc{nHI_P6L;~y5p*1$bJXYbJ>E+e(2LyM z-*m~l9S1wCSQt7-qLRuH0{HGsg!XWr{hNWZ5cZY*0aJ#$VfgFvSLQWOO*P2gsj0qJ zZj{_16ci-k|grPB?93M*j*07rIQRsP73TU7n=4dW(XSy3vdrOs*bK7BGN_v+I5 zwcVyf*}d8d4i1#~#9M?$hB{Ru>ySOT^V_=Kf`UX{hdpP!4#-jn)$$Cyd$M4Mf$%1& zn-FjSWUVeZA~I;$G_l$YJKS03X?zqGG28) zq(H<{hQ)J5V|TY7%nREX4FgyOP4w`ve(|t9j2kRdgFXyVQ}+cM68lNh=s2I|&scnL z`|-{#WPT7u$fx5c8%>1|MB*bFF)rH{!FeB zbeq>U-^ONUHfaR~I~u|SIu^TbBR5ykQJ7z+aMSY~S;|y9InLqR`vpAMolJO!S{rU= z?B4KXY@8u)we6b#+R>EdPMe#ze(0NFlNO_W_H6XVoW0nu3~4Y@8ZJf55c{=1MsH{6 zY774;&Z$1(E-oUwn0ZrTPby5}1ch*0)UZb~mRFwgmdRzxvY0eOqKk~y->ZQiLSVIL zpA-3a=054L1;Ji(zKkj`$KP3`M1DDC3KNkC$Q-|eO$dIU*RC}RcR`8?W*6{UhElGf zS{#4+FUN2`QG7u9l|k5iSp@Jr6GqF#>-wyyQF?j-GNyva2Dz19kM`jD+s1gwv##nv zn|Mu6{(OC%13H4X>P`5X-f?dgkXO#*&k4fT=$M&1e;gS^-$#lkk6x<&j z%7VnQdg+2zmM4hl@uPt*It>%MQo{Jrx$z;#XZf{;wj`7ylVG>6$R%^L z;Y>EdSDAO-e3I?Gcfg+LJp6Mh$R#%c`D%zf4Aq}wd)>q&-opjPzQ%1OJZlhlHy>uj z`or2dgJB#pk-*2WGqKA`*eN{%%}h?nHNmD&C$Y$QJ+y0Bnk>(m0$!?>@Q)jS1H;z>VVGebjKqSslmqT`gL2 z=Qa~0gyU0B90Ny`av)ZIel@bB!D7LVVg6_&G6S3RsL`z=SSbj>RgNmFQ<3j3po&HS z=Iwn~j8Z>`D!%pcDL1q`u|p1EOg!=?$HRAj;t}kF$FT8d@>uvhDk9jwN(uoUKBg7I zO*Lw344d;xm$&B!9U~q^kO)9+*&Z4lQ9(bOR+_{v_G~+-VJ_m&vph5HN^(Ogw>2J* z_?#D&J}7c{u@+Z$_DSx;=~34!oX4KnQFDm&lLcd0t$;_QP9G8WwAt~RDDPAy&YA|> zj@c9s`)_R&CPB$cce%-%w7!1Wz;W)?BUswOLKidR=~}~wk$mckYRbir+^&IQE#uS( zN5vSbz-qEdd|k{dAi5cUyuhs8i3>eb_vk*as83T#Ma`n===`HYnI4n#Olh*rDXi*6 z>y*~$bDTbgHoIzAjA-bdAbaA2&mXE2Ow0yh4ljI0ySW}{Ll)7KEOMCO*M~w3z?1yP zg(3#MS4TvaWg+cjr$#EoGk0ppZ%^(kG(pS7$86Ra^!g@eQ}Awgu{;&-$dF01x}UjXBiJmcT?Z|TA!*C4-^E(LW=;iKd1Rr>f_Z8 zxyj>|YZf)bqEkQd$ir=)C0JL45vOnm5eBVIaeYp_*KRK&5Rg~kP=aiO_XC}P?wYfc z?jg<%nb71b>Tyy;c^q{yy%&ej8gHqa>T-KAR%*t4>NT`**hqy@DK}Pxc8X+`l{q-tAE-D+ zsDX(N^#K)N+gdks4ALp>%QTQ4HW?;%vEk7A5Dt`t_@uqjLsvn60RzMwoAMhy=hjqo z1egxnWw?iqG#f4!JVMjWI+U5vvL{1fccp9vii+tt6Expkow#Tzh_N|1kxYo79V%aL znnB)}5kszyB+!8!_<SHC(UF$;#!%NI6_l5AQYAX+lvTM!{A7$>E@>_5# z^RU&duE0s`n3?y2!uO!6dosPu!d2xJ)i(r(r1D@ZsIcXs$yJA5`kh{?DCL=kx}6pa zgMBQ){BA^p1SY!uf*^{YHX{)R^rGwmX;s1{F3@WDmJe^nG$KfY z&@i}0y!Rt_)8r7twYTS+ff4+7I&anSvPE4s;ld8jMH0^!no<^t5qocoU$BU1FnhHf zuRnsZPqh!c$4kW0;4__c)nVO zlJsUjbj-bQ=&Df@$Vqgyp9+6G)WC2ig>`v4a&!f=4G3?VI*pYIdcVIyF6g`PA*eeW(j=b7tnl(>!J~ z40BWC1$`P>s5sMa)Zm3NcL+}IYblW{6pnp<2pfv9wA?-tBF8-g3YjnD;qH6D#oBcf zW7RnmySdtT-JE;PDp1v-%#amZ^JI^wJ<9Y~`K!vol!IGGNTLl1Kd!xK|HJfPYA{=k z{~~1UO2=^=$P5+4uTr?<2s0b+arPElMb?)L?CnG>UY%)_1*eZs z4-_`h;T(tjxMoZZ4A%_%-+@v)wjS<^_Iv?NfVhM$2Smv`%d77`x^|YqX6>*8Mv0Th_Xh%9Y{552fodJm3%-oG z--?fT;m?MS_-zZ9?snBd!@X1Gp1q)r>wTJmYp9tqz2A*#Tj;`Z84BSzVpT+Q4^uVQ ztBV91Exf_+U}CR5t`ZZ8HGcEw)V@-0^ng>Ry)19PI!|%J;1H)XjMH2FruoJaU7Qv* zuD8H+L z`O20kg@(JULVx0^9X!Io;KvbI&-sCn#TNRpkC(5H#rHbCj_Q<3F(BtcC9<)6QTZ1{rDN4jCJ#Q(XTR^uz1I2m!N1d6&)PNg3u+fm ztTEpeASl*65p@mBhBm|&C?`Fngc|hZC{YlG@mM|&V2e$P%mE0y_r?0OazM=PRTaqa zqjd+LbZE+OPM=oyx@$Ap0eYpfD*EHDABQ5$Vs7(jSdfiburX-(OT*Q%mEr#C#Kn`r zyJfRYx}A@r9(yvm$Z93*8r&YM1@ET`SyE>ci}P}5S2cyp5X8+6~j zw$w`=efPo5022wJmtbaWPaERfp-jMx27_)%zu_a{TrqK;FU0@?iDDNJ#r}m;9C7o^T&Sml{bLsp zfm|!O_Q-stAv>g4ldh7mM}*~JgWx%w>lBJI$tlNy&hr|i_H4Rnz}GQjfi9ArHpxez5Syc^1*s)mkUh6=I2T%k>U;{*}UE zyNIncGQ+M1_UzVn9+hYO6_jlcU{_X>@2xwJKgY4U+z-ozyE4^=euC z#Cv>M^MgW#4d4MKe|xAxBj8jyWJl07o!$g1HJ=y{|E+HWr(5LO&zgDawu$h@K=Wmj z2X`oXgqKrkM@H<0qv|HkPj~lGg{hU_2Nh8~Fu|r>pt{}hM)SKPp-?|jEQcJ{fu?;^ zU2gVFy*apdxGOE@yjq(JkrbKUw0P2CPH;IGxP6p?NCOL1aqs^0G zp1d5_3i!oJHKDEi&6B+xU>W`VXOO|oEA?}PMD)STchxsL=1C=9~hAd`|93g|r? zjJJN`={8?kUMZ#$H7W9)*+Pb=@S9kfBD80w5KUB)G0d4tN7i4!@L@~XCpp5)> zv{y)!(86s<*YN^u`f(5@IIw|Xo`u%_iQ08U9fQNjt1%^S2SWYKy4ts&89!@{-GAu( zA}hqW&3*pI5lHa*e`GuVG6b2V9WP(CeF0ue9DOuPJY&sgMh;ROS^Z zzxpIt&VJW+c5-*$Xk4SXPN+FAOVZxuiH5QpvTmR{xw-!1(M<3*!5joPFC7nKTYX7VdPV?q98x^#7%3Zvd9d$ZfAB&xDI#NmEKJN8syAj8ZrGG|Jn-0mrqCi# zw4S(RR^5%V=u=i@5P|ri`?VD3mP`4D5*~xbKEu&Jn$Be&@w)U&f{ZKo?V9&-8r1^L zw!{!=mIXMYJ(#|+dI%@NRr!3=#T;#SCa1`aeYe9A`Gg}`!aCub*?UWW{$Q4>j@Qg= zW0l4lEncIUUJvjy9a4Gxu-g~o&YV|&Sm*>@RTe03CJnkaC674y%+P4APOzZoFD-ku zz+r(@cZ0uPjUlBnd1Y9>7FBsBj@?cD7&Zr0wOBTAVIE$gEk*Q@`On@4*a@E(_+>=o zMhayY^qxtN9it>g?}{}J8@GfCgwr0#RT8m_Bw7e~FH=~Fk1PyVK(kJ!OlvP1tA-pU zr<&)pnl91`mQUgHGmb*!YxM9CR#f7c%;QOhH$lE>&mHa(UcZ1!I^xZ3ThpzntW>+0 zkooTUli_#Il@K8|hBjcxc426T$j3^}_=cD%8q0&2?m9a?=gGn*!?<3mI~sM?`F6%N z+wM(^wtXJ#oChYh!f6kDkwH8s;ayBA;uteDo{c3O6qq^{)aMYd=o2i)Zze@ z&lqwKvL1P&_DLg8rKuk3D*oe;UH9laqOg&9_N;v}4zbz3_a+rE)3GtAS!kU&rMRm+ z$z?=midEt;!3RylPd?m`_L0V9?h{8kj#h2HbXnPmlUX0gFYJf~hvXAJGghyDmo=fh zG-1$o3F&@sl#sdQ>D1cH19Ic_N!TgvZV_9p_N2;a*HX(kA7H(vMd%PoF>%NcFp!e< zmX+x$uKo8sAh<`(V{S$GC;LH zd80WKk)j^0J1BUiukv;A3!T`J`PjjimTuB_OG(Hs>+v?#M4FGCv`MC5588z5jo5QX zp2zQ?r+Bnzawgf0-li8I3!dj)c_6b9mXl*oZCITS%U>@q>d0Dxh~5>NK`d#0ZTlXO zT>D=O_)*mW=}!RS%iHF2KJ8{{SSH@zfJMOtEJFw$@KH8Z3V<8|;f&M!#aN@9P-(z2^csZG zgntYqnqH|0YLUGs*Rtq<9ol@IW zr{-{V^ytnba)U}C=0Y+=6JI>o7g-PjExrHodj3{r_0A`h2dZ1*l;$!jepsU;>ooo8 zS~_jhEkxSJN>AG?d)X-U7@eRacQsscj5Y&5HLLc)v*=~Emo}FpVq=JClbgH9CRTPe zY~7+65st^L2f~t#=b4!93X&&5$J!AA0h`I#`eV=fhH`AEMbU=dej81gsP~LL1TI)b z0DpdbJXTsf56*jmBIk6bP;X5#op&|PkHYNKFGN%0NcLF#rN#0MHWFDT>HEgY3MLF_ zpD&h4Mz%-VQPO3pg_hK^@=lOerr)<%e&@clKCaP+-a+BO?S0PZl$;+(Qw&+d)FN5y z8%!N`0%w}V{qjJp&QxqkZe~>z63PjxO2@fPE+zJ?yUT*`l;_s zi<*|T_@6*FsO@Xl%({FmY?n*%#ej;d6bxOHk-@JjH7M=$0HtWcLf)eCHyi)2zH(zs zclR>ttL?HQ*{(1tbaRFggpzw)N&$>c+Qen457#Yg8ut9M=4b3zbulZk1&WMbR`cGW zm77RO2v_p#qR4G4I^_v8;r(277e%Oh5*j{=tM-a4kkj&|gfF4jntaKsaPoUJEoryB z5jgo`Lj#DL$CU`%p-U?(^Zv?oIz1%u-K=(-c{%t=I2?qMr`dTEHKQe4OiCaxT$Qhhe^5D*6z2HcYAw?*d>1|OE7)@BE_Zb zfdyYJub$}bN9mWE_@_-KCrFh#k)@oQx2U&|?O=r~ce@+%kI^ z>ty9rY`Wpq6$o7;dJAY$bcv{(o$^vLAtqqvObfm8ZI6=Hz$VAP- z*T%pqxnJ#FbA|3hABsyB3|{?VR-}CVn~%xeIH(VOk=rw}`!MO9#FT_%qvj&unSCGr z<^0ATSqD7Ggm{mt(uPlx(wmscP z^qeWlT*BH7Zn*S17JWp8T}*OUM9mIa4;B{}XIdpch)s$pHJ2+!EXi7T)mDvICrTAW z`VqqkJICD(A03}uiR+Z*8*5z=&eZR_G8`Ui$SU}#sC3sulc+ynb!ES~e;S=6lzLcI z&1XLZaYL!B5{`mW>%1WQK4dNL(m40zf&ayRH1z&@p2Jh5lW(e@-UP$O6 zI_n5!yyXilrkUAfJA^f21Lt6ZirY_fdI-fk;+trCgJeHZS zkqKW`>AtYO7?)+dATfAopkw=IA(AWWt$Ze7v|@+?cuoc;(9fama^D> z$xAQ;dSbnqwo*!nz;=_WGmk3XU`?-leLw`172*GVV7XPR=Xv1)l~mNaj)i%f`?N4l zVW`=94J@#hcCT)jC%7%xfZo+zoI<~gzkQtlvYG%H=L?0x(k}Whmp%#YQ9MrTUx(%c z`qq|&pLiwfVUtsRB{iq@n%L+$DUgKPsNhk(UfKA=Gm^5JcY-Z)OYIgj4bOWlEiIBW zfQ9RDSyCTk54$X^w`tV~&q$nZwSu|NVeq+KL=3^}3jrIZ&E@FH)&YIMBdrZi+D$dH z8!D}rIpD;V(4+5Jz!}#$Xl+~Mx|!mOZd6&6KlW{3g2YVhn@*U^2%@`~Y3oNoZsC^H zN$hQIiCFg9y374ZCy*S&PnE$QG5Py^Z`|x8V52wbB-$7kGubQHEisM8et3PCHx)yOH9+ldfZhH)2zrbzWw4bH_z#v(kb{sP1HG@F8K)U_r26KZ86$ATz-K?!9%RsQNwG`PCq?O<58|c)PBod!8TXozK!6 z5elkID9e}4yq6t0II=y9(PDt*(wW?b-f=o5oO#mqi~y?t5Y#ov73kV+9<_hGV8^b) zsg^ocYBlRDBW>hTFpo;eB@;A05Xg#TG63QzdR-Fhr4oaJBm5741Y(Hc*bMES6Xgf&* zEU~lSjHO$Tg7l1orZmc{!ZyYxXQwo&;{f}>EkXHA7MqZUbb*L{k!CcW9!F&DxVo<) z97rP>-O!P-;sLb_rVHedSJf#|9?lG_mu0I9zCG5U{>&=Nq+7uKM1rEwsiAtyBH^LW zv)ET!_gMY;Mo}H6!8DVgu8=M1hjt#%Y#F-+$W{`_%lYRuso5TdFyPIV4j5ts3Byc} zExx%mN~w=MpZ~^_0;Se{VSsx7wJ5g@G}xeK+ca=%3wl$o?S))yYJ%40gFWh`PmQw$ z=M`L41Hni2A)QrBAL^aoI8@v^)QhupUrH3;_#$g+w7`FHmBSA|QpytePH z>sGSRA>=#6PvX}+FtdtTO+D`~HOb$ic-Drew`4Yb%z9YTUov?E=5@Acon_^l1DM$C zr4m1`O07I{gThpIROTV-9jm8b()74KMG)Qnh$|%^YNGA%EW`<7OXkl7zaIDZdn-6h z!5GwE+H1`|YPRuKY1d0C?YJFAz=#p$R2W@s?624u;ug0_dEC6i}sgJ2|=K8RaVy>Zso;ZbK5!&cW zUE`*_JIl5vcYqy#xYT{F>rJp+yM@VD>_);&$Sw!|^-2cx^Y{u80{Q?E|8WARJRP4tsQVoPl5Q3*PDb& zHOeZuHX@bAb4CR!Cz`&UDt4FGGcfXLEGzRqD#gdd6|2VKb3vxBQGMF;2(qpng$-Q@ z3kKh*ZkS0RSN)dNqTY+g^VQY)*858$eLg<3cx4!P=vLrGsDRgYb6Dln=Y^X)S4)`l z1k&tcibp64u12Zc(V_gxzn?(76x>!`nO3NY z5i~E+?)?w#I&*&8#YiK<+n%G-zB9)^CZ)V;Va<8Y_IK^JGk$>K-O9t-hhP>K1ovvI z!{X8Cc2MB4LgSfbWEsVz)s5n_(l2x`UbHZ`Ti1qwrEtu(jTij^9h2TOL~O6SRw7%~ zf`}<%zt^$~dZi`oqk^QbxMbq4@8z^@^R&85*z>^-QxZ_+gIn+f_Npk$`np6?xqE_O z1ALn^IY?~^p0jdK8ePMlL@3Ue6K+%3ceTVjQ^Rq}W&*}<%4hsAhuh)E48)iGMA#i6AmZ|(K(x&0vCscH=C7x8%5g)0i zZv5WKg4K9O#k|8PX4=p-cJlKwsoY`ywAe_jDuwCRVAGP|w`bPZz*;OT<}&p)(1L64T3R!-0?+r?pocjO!V|?wVo8~o6=K;l$?nN zxn|AcrbNMf%Jn$Dz2*>{b#mM0ItgjE22)w!^PR1nC~f-QS##sL1mSKvtp;a@RiUtS ze6jSf)2!Vvx6~`55gT>(T-oUet^Lz$B}I#XKhiwi&F2-75jV392zz^no~xX9&8-5= z@1VU6KeEPQkos#FBW6>4f58}19fC4>(8lW=+v%^YE8N|7`>}#XCn_^f+_E#hN{xZqK^!@3!LCLFSfM#OxU9>p)WJO;qj01e{G^* zs~vKSz6tEGQsCr74kSgsK?a!(^hMjqO@~q2|4yGv&{l=~!B0Q%$8wUMSZv{MOm!gH ze_L@f^aNVP=3m4rfBE>w&tjdMV0=tYh>v^@B1f;nAyQd&>e&4t>#0_Rfto@ghjXEa z;L$xZT4#JFciTRG`*bs}>^hMifQXUgu*^et6(4ae%_Ow2YITc!tFSWO7kc?W{h$v&Q}D zBas1)QoybM_2t`7UJAcInaB{O>h0$RGa&fJAC5?ieOk|uD;?!IQ*T|TfMnHc>{~zN zN>OC5xUAWs??^z%nvBM*23Nv~Y{i*rF-N5p!=g1>v$HpQ&p0BUDXj|hI*pWZra^eP zt16W24rDjKq`U(&e-mvcw!69JB?=>bPs+hwQ4wS19@}Ki`M!d3tl<>?DxJ8PbI2-Q z+4q#GGSNK{E|_F_xA`4k^@Q?UOJuT=Z|`a%ccZDOb@u0>u9S6ILO*(26Z5snTe?mV z-y50FLYMUYrYPmnBR3DPWT?AJ|M0torQF^um|oxY%vClM{a)<2 zV^f#L02gR)<>iT;l!$%I8GC2n&D|WoKpaVeBg@&O*9&>>S{3kguqdvdICo~q1{Ebj zp(w>qCPSt1wE}(}eFwkPp{9>6JEid_UUBov#mTg~(B|@m+Kw_+rjG>1BmkcGFmt`Y zak4_nsJHmSyJ7&;@3ZwIyxd?3fobaj?OR+bBm3NZ8ELj=La%QFlWjXK-+WOxEH+Y( z7AQCJjGH$4Q4wbo^QE_(U-*aq+lj$Oc~R>??sN&${qF_Mn*(x>#^X1Tm;LDMClkhM zU9uoIN<3Y-M(QGqmPfOG#3|@cD9$vL^>cg=`NX<#=N4YW^&>ztl;TDc+k()ZM+QpM zwF(#)BxJSgDqx47Iu1&d5qmn$F_y~XC&CwUdC2mEKzQ-`rQ8;(b_dxT4J;)NPY?mx<<STpD5Cb^-G9*0lsmzZJ(OnH2dWDgH548X1w7B`AkH*#?VEd?}v$woQZ3z zsjNYhmlN+LZOyj}SU{F_+~vM9y_6oQzzJcVp5F3fa^Q)YB(5)5;|%lsREMf-j5DxF zBz`+%9XgX8W5x8M@TpG@)vR^I=;Oz#LnD;c=yCbJDvf{({4pP~46Kx9Wi5rDf5JqI)-u5_+X}A(dO6nvQ`!niP>0|1O-bXfW zz2ZYkNwOR9p#c#ju605{O%XkH5ji23Z1o92#)Z*^yCm#w7 zCfiNSTNsw-5E8nsTmQ~t{Ws>{!$tCpqw>n%m1O{P+-D{%3u;620^>D z1IEWO*Hk5c$C`|B*VY8BcrKu+M)s$^a7FYuM{vjRzjUvN-QXWPP_lF1zq;e;ioAS5 z!yW7SLG7cmOV$R>;D!V>ilRqa#HfQpE@GXdy!`jdU>r_H2hh5bHYCAj*`}t0mo$B3 zGe8XtJ7##b(#CG+&(;o0(0cbZ0WTon{Y!_+KDv*A$G*|vqegIWAN?BY(;4fVm_fvpb>3^f6qhHvRc4$^S@s_n%mh@TFzsOxU0L_9x^ips15# zrA3s`Zi}(1<>}@`RL)^|KgKlx|KirVWPtrzF47x@tSDjGK|^yMKyxNJXemmxFG_tf z12-k~J~>=40k(wMP4UH3pwWt$77Cr%@dQ^lw%C#V!pu7|q@Gl(feoJX=1R%PdH!LV zor1cDc}h(wDbGdbJh-`!raeE97LCA*>2RL&J&-F+wIXYJgwaCUkz3v1u;DAprx|x- zB3<3Ny1=PIPrR3=9+Gg z!}Hf=c!@Ud%c(P=fazX^uz-g41DeWhaF5FHc4d(`8vw>q$dfqy0%Q4dkMZ#J0Fd|k zf3r#R8~)Ng7b3q?s{B%CDpgh(c+Gcp_*(p-NkRStF8rkq{3S)P)Fc3!2!IX(=pm4p zkG@@gm-Q<_0o|b>pp1_Fr9b?oCHzgO@(VGvPWnx-A}d-8D5MqtP$^PaM~rv^p4E_K zqY+@jg!eo8ed`Y`BQX8?9ijY%RQ~^SXf*}dJ-0*n*(Lj*c&FMqdUfNk*K95Xn*b^J zKO2<&f7E*ZXUt$MW4lCQx4tmZniY`v%X}`ryE|XfmU42}6rrbD>oU@9L6F`TGY(V4 za}rM{^<(^gFAkt==~P4oNzvZCcy`6zsf$s#5MWNRC=$Z~JOC_hfCqo=<7RpWh{*v{7&l*1MLeH_<7futw@3g1 zM4t=&#EZ}K`)#i((?3V@`b#8@O2pK~Yna{zFv&*ZQcdLn-$m*3j860+`$jXM5&7+K zrj^)VI{{3^OQOvek4rw=j_7hJ|u;3>i@({Q< zS!Wu*-Kyq)+^SP*pkH47uXFl6e(rCf6i1Q&CY1Pph5FaK{#&TczlO^DJsGP0T{8S) zJ^hgkf2Y;oY4wi`{a>3_()_KG9%02zYzCXjIl+#H4h%Aa7vHSG*+@b*gLL zHEjt(Sn<1xyo+vW{$>mN|D|urAFg2kyS@IuF?RU(z92+<%xzCcr0<4i7=Sa}V7cph zNSn|M2xnCP1##%5cWL)dJe|rIQXfiNzm%%uACR3F+v7RECW$`37()Jdri|lm;sm;8 zLSP_hp49k>CvcnK+gb@AaD6@Ao_@%>i99)j{KQ)hWWgk3<1bz_!GTf5P8L`CflU5W z!@&imDFyOd0D$kKw|RczCEh}|0_9szZ@M*~odcwd{nnQQP literal 0 HcmV?d00001 diff --git a/src/python/7.AdaBoost/adaboost.py b/src/python/7.AdaBoost/adaboost.py index 9bd19538..1672f9bd 100644 --- a/src/python/7.AdaBoost/adaboost.py +++ b/src/python/7.AdaBoost/adaboost.py @@ -43,9 +43,9 @@ def stumpClassify(dataMat, dimen, threshVal, threshIneq): """stumpClassify(将数据集,按照feature列的value进行 二分法切分比较来赋值分类) Args: - dataMat Matrix数据集 - dimen 特征列 - threshVal 特征列要比较的值 + dataMat Matrix数据集 + dimen 特征列 + threshVal 特征列要比较的值 Returns: retArray 结果集 """ @@ -110,11 +110,11 @@ def buildStump(dataArr, labelArr, D): # 例如: 一个都没错,那么错误率= 0.2*0=0 , 5个都错,那么错误率= 0.2*5=1, 只错3个,那么错误率= 0.2*3=0.6 weightedError = D.T*errArr ''' - dim 表示 feature列 - threshVal 表示树的分界值 - inequal 表示计算树左右颠倒的错误率的情况 - weightedError 表示整体结果的错误率 - bestClasEst 预测的最优结果 + dim 表示 feature列 + threshVal 表示树的分界值 + inequal 表示计算树左右颠倒的错误率的情况 + weightedError 表示整体结果的错误率 + bestClasEst 预测的最优结果 ''' # print "split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f" % (i, threshVal, inequal, weightedError) if weightedError < minError: @@ -155,7 +155,7 @@ def adaBoostTrainDS(dataArr, labelArr, numIt=40): # store Stump Params in Array weakClassArr.append(bestStump) - # print "alpha=%s, classEst=%s, bestStump=%s, error=%s " % (alpha, classEst.T, bestStump, error) + print "alpha=%s, classEst=%s, bestStump=%s, error=%s " % (alpha, classEst.T, bestStump, error) # -1主要是下面求e的-alpha次方; 如果判断正确,乘积为1,否则成绩为-1,这样就可以算出分类的情况了 expon = multiply(-1*alpha*mat(labelArr).T, classEst) print '\n' @@ -207,8 +207,11 @@ def plotROC(predStrengths, classLabels): Args: predStrengths 最终预测结果的权重值 - classLabels 原始数据的分类结果集 + classLabels 原始数据的分类结果集 """ + print 'predStrengths=', predStrengths + print 'classLabels=', classLabels + import matplotlib.pyplot as plt # variable to calculate AUC ySum = 0.0 @@ -221,6 +224,8 @@ def plotROC(predStrengths, classLabels): # argsort函数返回的是数组值从小到大的索引值 # get sorted index, it's reverse sortedIndicies = predStrengths.argsort() + # 测试结果是否是从小到大排列 + print 'sortedIndicies=', sortedIndicies, predStrengths[0, 176], predStrengths.min(), predStrengths[0, 293], predStrengths.max() # 开始创建模版对象 fig = plt.figure() @@ -239,7 +244,7 @@ def plotROC(predStrengths, classLabels): ySum += cur[1] # draw line from cur to (cur[0]-delX, cur[1]-delY) # 画点连线 (x1, x2, y1, y2) - # print cur[0], cur[0]-delX, cur[1], cur[1]-delY + print cur[0], cur[0]-delX, cur[1], cur[1]-delY ax.plot([cur[0], cur[0]-delX], [cur[1], cur[1]-delY], c='b') cur = (cur[0]-delX, cur[1]-delY) # 画对角的虚线线 @@ -260,47 +265,46 @@ def plotROC(predStrengths, classLabels): if __name__ == "__main__": - # 我们要将5个点进行分类 - dataArr, labelArr = loadSimpData() - print 'dataArr', dataArr, 'labelArr', labelArr + # # 我们要将5个点进行分类 + # dataArr, labelArr = loadSimpData() + # print 'dataArr', dataArr, 'labelArr', labelArr - # D表示最初值,对1进行均分为5份,平均每一个初始的概率都为0.2 - # D的目的是为了计算错误概率: weightedError = D.T*errArr - D = mat(ones((5, 1))/5) - print 'D=', D.T + # # D表示最初值,对1进行均分为5份,平均每一个初始的概率都为0.2 + # # D的目的是为了计算错误概率: weightedError = D.T*errArr + # D = mat(ones((5, 1))/5) + # print 'D=', D.T - # bestStump, minError, bestClasEst = buildStump(dataArr, labelArr, D) - # print 'bestStump=', bestStump - # print 'minError=', minError - # print 'bestClasEst=', bestClasEst.T + # # bestStump, minError, bestClasEst = buildStump(dataArr, labelArr, D) + # # print 'bestStump=', bestStump + # # print 'minError=', minError + # # print 'bestClasEst=', bestClasEst.T + # # 分类器:weakClassArr + # # 历史累计的分类结果集 + # weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 9) + # print '\nweakClassArr=', weakClassArr, '\naggClassEst=', aggClassEst.T - # 分类器:weakClassArr - # 历史累计的分类结果集 - weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 9) - print '\nweakClassArr=', weakClassArr, '\naggClassEst=', aggClassEst.T + # """ + # 发现: + # 分类的权重值:最大的值,为alpha的加和,最小值为-最大值 + # 特征的权重值:如果一个值误判的几率越小,那么D的特征权重越少 + # """ - """ - 发现: - 分类的权重值:最大的值,为alpha的加和,最小值为-最大值 - 特征的权重值:如果一个值误判的几率越小,那么D的特征权重越少 - """ + # # 测试数据的分类结果, 观测:aggClassEst分类的最终权重 + # print adaClassify([0, 0], weakClassArr).T + # print adaClassify([[5, 5], [0, 0]], weakClassArr).T - # 测试数据的分类结果, 观测:aggClassEst分类的最终权重 - print adaClassify([0, 0], weakClassArr).T - print adaClassify([[5, 5], [0, 0]], weakClassArr).T - - # # 马疝病数据集 - # # 训练集合 - # dataArr, labelArr = loadDataSet("input/7.AdaBoost/horseColicTraining2.txt") - # weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40) - # print weakClassArr, '\n-----\n', aggClassEst.T - # # 计算ROC下面的AUC的面积大小 - # plotROC(aggClassEst.T, labelArr) - # # 测试集合 - # dataArrTest, labelArrTest = loadDataSet("input/7.AdaBoost/horseColicTest2.txt") - # m = shape(dataArrTest)[0] - # predicting10 = adaClassify(dataArrTest, weakClassArr) - # errArr = mat(ones((m, 1))) - # # 测试:计算总样本数,错误样本数,错误率 - # print m, errArr[predicting10 != mat(labelArrTest).T].sum(), errArr[predicting10 != mat(labelArrTest).T].sum()/m + # 马疝病数据集 + # 训练集合 + dataArr, labelArr = loadDataSet("input/7.AdaBoost/horseColicTraining2.txt") + weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40) + print weakClassArr, '\n-----\n', aggClassEst.T + # 计算ROC下面的AUC的面积大小 + plotROC(aggClassEst.T, labelArr) + # 测试集合 + dataArrTest, labelArrTest = loadDataSet("input/7.AdaBoost/horseColicTest2.txt") + m = shape(dataArrTest)[0] + predicting10 = adaClassify(dataArrTest, weakClassArr) + errArr = mat(ones((m, 1))) + # 测试:计算总样本数,错误样本数,错误率 + print m, errArr[predicting10 != mat(labelArrTest).T].sum(), errArr[predicting10 != mat(labelArrTest).T].sum()/m diff --git a/src/python/7.AdaBoost/sklearn-adaboost-demo.py b/src/python/7.AdaBoost/sklearn-adaboost-demo.py index f313aefe..c685d9c9 100644 --- a/src/python/7.AdaBoost/sklearn-adaboost-demo.py +++ b/src/python/7.AdaBoost/sklearn-adaboost-demo.py @@ -4,7 +4,7 @@ """ Created on 2017-07-10 Updated on 2017-07-10 -@author: 片刻/Noel Dawe +@author: 片刻/Noel Dawe 《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning sklearn-AdaBoost译文链接: http://cwiki.apachecn.org/pages/viewpage.action?pageId=10813457 """ From 82fc610895a707b7809f1dab1923840916cb4521 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Thu, 10 Aug 2017 12:10:08 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A8=E8=8D=90?= =?UTF-8?q?=E7=B3=BB=E7=BB=9F=E7=9A=84=E9=83=A8=E5=88=86=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/16.推荐系统.md | 47 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/docs/16.推荐系统.md b/docs/16.推荐系统.md index 7e5df78d..1372af85 100644 --- a/docs/16.推荐系统.md +++ b/docs/16.推荐系统.md @@ -1,5 +1,42 @@ # 第16章 推荐系统 +## 背景与挖掘目标 + +随着互联网的快速发展,用户很难快速从海量信息中寻找到自己感兴趣的信息。因此诞生了:搜索引擎+推荐系统 + +本章节-推荐系统: + +1. 帮助用户发现其感兴趣和可能感兴趣的信息。 +2. 让网站价值信息脱颖而出,得到广大用户的认可。 +3. 提高用户对网站的忠诚度和关注度,建立稳固用户群体。 + +## 分析方法与过程 + +本案例的目标是对用户进行推荐,即以一定的方式将用户与物品(本次指网页)之间建立联系。 + +由于用户访问网站的数据记录很多,如果不对数据进行分类处理,对所有的记录直接采用推荐系统进行推荐,这样会存在一下问题。 + +1. 数据量太大意味着物品数与用户数很多,在模型构建用户与物品稀疏矩阵时,出现设备内存空间不够的情况,并且模型计算需要消耗大量的时间。 +2. 用户区别很大,不同的用户关注的信息不一样,因此,即使能够得到推荐结果,其效果也会不好。 + +为了避免出现上述问题,需要进行分类处理与分析。 + +正常的情况下,需要对用户的兴趣爱好以及需求进行分类。 +因为在用户访问记录中,没有记录用户访问页面时间的长短,因此不容易判断用户兴趣爱好。 +因此,本文根据用户浏览的网页信息进行分析处理,主要采用以下方法处理:以用户浏览网页的类型进行分类,然后对每个类型中的内容进行推荐。 + +分析过程如下: + +* 从系统中获取用户访问网站的原始记录。 +* 对数据进行多维分析,包括用户访问内容,流失用户分析以及用户分类等分析。 +* 对数据进行预处理,包含数据去重、数据变换和数据分类鞥处理过程。 +* 以用户访问html后缀的页面为关键条件,对数据进行处理。 +* 对比多种推荐算法进行推荐,通过模型评价,得到比较好的智能推荐模型。通过模型对样本数据进行预测,获得推荐结果。 + + + +## 主流推荐算法 + | 推荐方法 | 描述 | | --- | --- | | 基于内容推荐 | | @@ -11,13 +48,13 @@ ![推荐方法对比](/images/16.RecommendedSystem/推荐方法对比.png) -## 基于知识推荐 +### 基于知识推荐 基于知识的推荐(Knowledge-based Recommendation)在某种程度是可以看成是一种推理(Inference)技术,它不是建立在用户需要和偏好基础上推荐的。基于知识的方法因它们所用的功能知识不同而有明显区别。效用知识(Functional Knowledge)是一种关于一个项目如何满足某一特定用户的知识,因此能解释需要和推荐的关系,所以用户资料可以是任何能支持推理的知识结构,它可以是用户已经规范化的查询,也可以是一个更详细的用户需要的表示。 ![基于知识的推荐](/images/16.RecommendedSystem/基于知识的推荐.jpg) -## 协同过滤推荐 +### 协同过滤推荐 * memory-based推荐 * Item-based方法 @@ -30,6 +67,12 @@ * 矩阵分解背后的核心思想,找到两个矩阵,它们相乘之后得到的那个矩阵的值,与评分矩阵R中有值的位置中的值尽可能接近。这样一来,分解出来的两个矩阵相乘就尽可能还原了评分矩阵R,因为有值的地方,值都相差得尽可能地小,那么missing的值通过这样的方式计算得到,比较符合趋势。 * 协同过滤中主要存在如下两个问题:稀疏性与冷启动问题。已有的方案通常会通过引入多个不同的数据源或者辅助信息(Side information)来解决这些问题,用户的Side information可以是用户的基本个人信息、用户画像信息等,而Item的Side information可以是物品的content信息等。 +## 效果评估 + +1. 召回率和准确率 【人为统计分析】 +2. F值(P-R曲线) 【偏重:非均衡问题】 +3. ROC和AUC 【偏重:不同结果的对比】 + * * * * **作者:[片刻](http://www.apache.wiki/display/~jiangzhonglian)** From 48ce677e3d77035dbf940feda74f6c31752d7ed9 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Mon, 14 Aug 2017 15:30:44 +0800 Subject: [PATCH 09/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20AdaBoost=20=E9=83=A8?= =?UTF-8?q?=E5=88=86=E7=9A=84=E6=B3=A8=E8=A7=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/6.支持向量机.md | 8 ++--- docs/7.1.利用AdaBoost元算法提高分类.md | 44 ++++++++++++++------------ docs/7.2.随机森林的使用.md | 2 +- src/python/7.AdaBoost/adaboost.py | 6 ++-- 4 files changed, 32 insertions(+), 28 deletions(-) diff --git a/docs/6.支持向量机.md b/docs/6.支持向量机.md index 097161d0..e36074ba 100644 --- a/docs/6.支持向量机.md +++ b/docs/6.支持向量机.md @@ -15,7 +15,7 @@ * 注意:`SVM几何含义比较直观,但其算法实现较复杂,牵扯大量数学公式的推导。` ``` -优点:泛化错误率低,计算开销不大,结果易理解。 +优点:泛化(由具体的、个别的扩大为一般的,就是说:模型训练完后的新样本)错误率低,计算开销不大,结果易理解。 缺点:对参数调节和核函数的选择敏感,原始分类器不加修改仅适合于处理二分类问题。 使用数据类型:数值型和标称型数据。 ``` @@ -47,13 +47,13 @@ This is the simplest kind of SVM (Called an LSVM) Support Vectors are those data 1. 直觉上是安全的 2. 如果我们在边界的位置发生了一个小错误(它在垂直方向上被颠倒),这给我们最小的错误分类机会。 -3. CV很容易,因为该模型对任何非支持向量数据点的去除是免疫的。 +3. CV(Computer Vision 计算机视觉 - 这缩写看着可怕)很容易,因为该模型对任何非支持向量数据点的去除是免疫的。 4. 有一些理论,这是一件好事。 5. 通常它的工作非常好。 ``` * 选择D会比B、C分隔的效果要好很多,原因是上述的5个结论。 -* 所有的点看作地雷吧,那么我们(超平面)得找到最近所有的地雷,并保证我们离它最远。 +* 如果把所有的点看作地雷,那么我们(超平面)得找到最近所有的地雷,并保证我们离它最远。 ![线性可分](/images/6.SVM/SVM_3_linearly-separable.jpg) ### 怎么寻找最大间隔 @@ -70,7 +70,7 @@ This is the simplest kind of SVM (Called an LSVM) Support Vectors are those data * 类别标签用-1、1,是为了后期方便 \\(lable*(w^Tx+b)\\) 的标识和距离计算;如果 \\(lable*(w^Tx+b)>0\\) 表示预测正确,否则预测错误。 * 现在目标很明确,就是要找到`w`和`b`,因此我们必须要找到最小间隔的数据点,也就是前面所说的`支持向量`。 * 也就说,让最小的距离取最大.(最小的距离:就是最小间隔的数据点;最大:就是最大间距,为了找出最优超平面--最终就是支持向量) - * 怎么理解呢? 例如: 所有的点看作地雷吧,那么我们(超平面)得找到最近所有的地雷,并保证我们离它最远。 + * 怎么理解呢? 例如: 如果把所有的点看作地雷,那么我们(超平面)得找到最近所有的地雷,并保证我们离它最远。 * 目标函数:\\(arg: max\ \{min\ [lable*(w^Tx+b)/||w||]\}\\) * 1.如果 \\(lable*(w^Tx+b)>0\\) 表示预测正确,也称`函数间隔`,\\(||w||\\) 可以理解为归一化,也称`几何间隔`,我们始终可以找到一个阈值让 \\(lable*(w^Tx+b)>=1\\) * 2.所以令 \\(lable*(w^Tx+b)=1\\),我们本质上是求 \\(arg: max\{关于w, b\}\ (1/||w||)\\);也就说,我们约束(前提)条件是: \\(lable*(w^Tx+b)=1\\) diff --git a/docs/7.1.利用AdaBoost元算法提高分类.md b/docs/7.1.利用AdaBoost元算法提高分类.md index 75f705c9..a4dca768 100644 --- a/docs/7.1.利用AdaBoost元算法提高分类.md +++ b/docs/7.1.利用AdaBoost元算法提高分类.md @@ -1,4 +1,4 @@ -# 第7章 利用AdaBoost元算法提高分类 +# 第7.1章 利用AdaBoost元算法提高分类 ![利用AdaBoost元算法提高分类](/images/7.AdaBoost/adaboost_headPage.jpg "利用AdaBoost元算法提高分类") @@ -9,7 +9,7 @@ * 概念:是对其他算法进行组合的一种形式。 * 通俗来说: 当做重要决定时,大家可能都会考虑吸取多个专家而不只是一个人的意见。 机器学习处理问题时又何尝不是如此? 这就是元算法(meta-algorithm)背后的思想。 -* 集成方法: 1. 投票选举 2. 再学习 +* 集成方法: 1. 投票选举(bagging) 2. 再学习(boosting) > bagging:基于数据随机重抽样的分类器构造方法 @@ -18,41 +18,45 @@ 2. 每个数据集都是通过在原始数据集中随机选择一个样本来进行替换(替换:意味着可以多次选择同一个样本,也就有重复值)而得到的。 3. 该算法作用的数据集就会得到S个分类器,与此同时,选择分类器投票结果中最多的类别作为最后的分类结果。 4. 例如:随机森林(random forest) -* 追美女:美女选择择偶对象的时候,会问几个闺蜜的建议,最后选择一个综合得分最高的一个作为男朋友 +* 选帅哥:美女选择择偶对象的时候,会问几个闺蜜的建议,最后选择一个综合得分最高的一个作为男朋友 -> boosting +> boosting:是基于所有分类器的加权求和的方法 * boosting是一种与bagging很类似的技术。 * 不过boosting分类的结果是基于所有分类器的加权求和结果的。不论是boosting还是bagging当中,所使用的多个分类器的类型都是一致的。 -* 区别是什么? - 1. bagging:不同的分类器是通过串形训练而获得的,每个新分类器斗根据已训练出的分类器的性能来进行训练。 - 2. boosting:是通过集中关注被已有分类器错分的那些数据来获得新的分类器。 - 3. 由于boosting分类的结果是基于所有分类器的加权求和结果的,因此boosting与bagging不太一样。 - 4. bagging中的分类器权重是相等的,而boosting中的分类器权重并不相等,每个权重代表的是其对应分类器在上一轮迭代中的成功度。 * 目前boosting方法最流行的版本是: AdaBoost。 * 追美女:第1个帅哥失败->(传授经验:姓名、家庭情况) 第2个帅哥失败->(传授经验:兴趣爱好、性格特点) 第3个帅哥成功 -## 应用AdaBoost算法 +> bagging 和 boosting 区别是什么? + +1. bagging:不同的分类器是通过串形训练而获得的,每个新分类器斗根据已训练出的分类器的性能来进行训练。 +2. boosting:是通过集中关注被已有分类器错分的那些数据来获得新的分类器。 +3. 由于 boosting 分类的结果是基于所有分类器的加权求和结果的,因此 boosting 与 bagging 不太一样。 +4. bagging 中的分类器权重是相等的,而 boosting 中的分类器权重并不相等,每个权重代表的是其对应分类器在上一轮迭代中的成功度。 + +## 应用 AdaBoost 算法 > AdaBoost(adaptive boosting: 自适应boosting) ``` 能否使用弱分类器和多个实例来构建一个强分类器? 这是一个非常有趣的理论问题。 -* 优点:泛化错误率低,易编码,可以应用在大部分分类器上,无参数调节。 +* 优点:泛化(由具体的、个别的扩大为一般的,就是说:模型训练完后的新样本)错误率低,易编码,可以应用在大部分分类器上,无参数调节。 * 缺点:对离群点敏感。 * 适用数据类型:数值型和标称型数据。 ``` -> AdaBoost的一般流程 +> AdaBoost 的一般流程 ``` 收集数据:可以使用任意方法 -准备数据:依赖于所使用的弱分类器类型,本章使用的是单层决策树,这种分类器可以处理任何数据类型。当然也可以使用任意分类器作为弱分类器,第2章到第6章中的任一分类器都可以充当弱分类器。作为弱分类器,简单分类器的效果更好。 +准备数据:依赖于所使用的弱分类器类型,本章使用的是单层决策树,这种分类器可以处理任何数据类型。 + 当然也可以使用任意分类器作为弱分类器,第2章到第6章中的任一分类器都可以充当弱分类器。 + 作为弱分类器,简单分类器的效果更好。 分析数据:可以使用任意方法。 -训练数据:AdaBoost的大部分时间都用在训练上,分类器将多次在同一数据集上训练弱分类器。 +训练数据:AdaBoost 的大部分时间都用在训练上,分类器将多次在同一数据集上训练弱分类器。 测试数据:计算分类的错误率。 -使用算法:通SVM一样,AdaBoost预测两个类别中的一个。如果想把它应用到多个类别的场景,那么就要像多类SVM中的做法一样对AdaBoost进行修改。 +使用算法:通SVM一样,AdaBoost 预测两个类别中的一个。如果想把它应用到多个类别的场景,那么就要像多类 SVM 中的做法一样对 AdaBoost 进行修改。 ``` * 训练算法: 基于错误提升分类器的性能 @@ -70,15 +74,15 @@ ``` 发现: -alpha目的主要是计算每一个分类器实例的权重(组合就是分类结果) - 分类的权重值:最大的值=alpha的加和,最小值=-最大值 -D的目的是为了计算错误概率: weightedError = D.T*errArr,求最佳分类器 - 特征的权重值:如果一个值误判的几率越小,那么D的特征权重越少 +alpha 目的主要是计算每一个分类器实例的权重(组合就是分类结果) + 分类的权重值:最大的值= alpha 的加和,最小值=-最大值 +D 的目的是为了计算错误概率: weightedError = D.T*errArr,求最佳分类器 + 特征的权重值:如果一个值误判的几率越小,那么 D 的特征权重越少 ``` ![AdaBoost算法权重计算公式](/images/7.AdaBoost/adaboost_alpha.png "AdaBoost算法权重计算公式") -## 完整AdaBoost算法的实现 +## 完整 AdaBoost 算法的实现 整个实现的伪代码如下: diff --git a/docs/7.2.随机森林的使用.md b/docs/7.2.随机森林的使用.md index 5693ed65..04b07c4e 100644 --- a/docs/7.2.随机森林的使用.md +++ b/docs/7.2.随机森林的使用.md @@ -1,4 +1,4 @@ -# 第7章 随机森林的使用(个人补充,非课本内容) +# 第7.2章 随机森林的使用(个人补充,非课本内容) ## 基本介绍 diff --git a/src/python/7.AdaBoost/adaboost.py b/src/python/7.AdaBoost/adaboost.py index 1672f9bd..52effab7 100644 --- a/src/python/7.AdaBoost/adaboost.py +++ b/src/python/7.AdaBoost/adaboost.py @@ -257,9 +257,9 @@ def plotROC(predStrengths, classLabels): plt.show() ''' 参考说明:http://blog.csdn.net/wenyusuran/article/details/39056013 - 为了计算AUC,我们需要对多个小矩形的面积进行累加。这些小矩形的宽度是xStep,因此 - 可以先对所有矩形的高度进行累加,最后再乘以xStep得到其总面积。所有高度的和(ySum)随 - 着x轴的每次移动而渐次增加。 + 为了计算 AUC ,我们需要对多个小矩形的面积进行累加。 + 这些小矩形的宽度是xStep,因此可以先对所有矩形的高度进行累加,最后再乘以xStep得到其总面积。 + 所有高度的和(ySum)随着x轴的每次移动而渐次增加。 ''' print "the Area Under the Curve is: ", ySum*xStep From af6229ceadede6dcfc7d735d35221248ce931723 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Tue, 15 Aug 2017 18:31:34 +0800 Subject: [PATCH 10/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20adaboost=20sklearn?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E6=A1=88=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- input/7.AdaBoost/horseColicTest2.libsvm | 67 +++++++++++ .../7.AdaBoost/sklearn-adaboost-demo.py | 106 +++++++----------- tools/python2libsvm.py | 52 +++++++++ 3 files changed, 159 insertions(+), 66 deletions(-) create mode 100644 input/7.AdaBoost/horseColicTest2.libsvm create mode 100644 tools/python2libsvm.py diff --git a/input/7.AdaBoost/horseColicTest2.libsvm b/input/7.AdaBoost/horseColicTest2.libsvm new file mode 100644 index 00000000..d2661d63 --- /dev/null +++ b/input/7.AdaBoost/horseColicTest2.libsvm @@ -0,0 +1,67 @@ +1 0:2 1:1 2:38.5 3:54 4:20 5:0 6:1 7:2 8:2 9:3 10:4 11:1 12:2 13:2 14:5.9 15:0 16:2 17:42 18:6.3 19:0 20:0 +1 0:2 1:1 2:37.6 3:48 4:36 5:0 6:0 7:1 8:1 9:0 10:3 11:0 12:0 13:0 14:0 15:0 16:0 17:44 18:6.3 19:1 20:5 +1 0:1 1:1 2:37.7 3:44 4:28 5:0 6:4 7:3 8:2 9:5 10:4 11:4 12:1 13:1 14:0 15:3 16:5 17:45 18:70 19:3 20:2 +-1 0:1 1:1 2:37 3:56 4:24 5:3 6:1 7:4 8:2 9:4 10:4 11:3 12:1 13:1 14:0 15:0 16:0 17:35 18:61 19:3 20:2 +1 0:2 1:1 2:38 3:42 4:12 5:3 6:0 7:3 8:1 9:1 10:0 11:1 12:0 13:0 14:0 15:0 16:2 17:37 18:5.8 19:0 20:0 +1 0:1 1:1 2:0 3:60 4:40 5:3 6:0 7:1 8:1 9:0 10:4 11:0 12:3 13:2 14:0 15:0 16:5 17:42 18:72 19:0 20:0 +1 0:2 1:1 2:38.4 3:80 4:60 5:3 6:2 7:2 8:1 9:3 10:2 11:1 12:2 13:2 14:0 15:1 16:1 17:54 18:6.9 19:0 20:0 +1 0:2 1:1 2:37.8 3:48 4:12 5:2 6:1 7:2 8:1 9:3 10:0 11:1 12:2 13:0 14:0 15:2 16:0 17:48 18:7.3 19:1 20:0 +1 0:2 1:1 2:37.9 3:45 4:36 5:3 6:3 7:3 8:2 9:2 10:3 11:1 12:2 13:1 14:0 15:3 16:0 17:33 18:5.7 19:3 20:0 +-1 0:2 1:1 2:39 3:84 4:12 5:3 6:1 7:5 8:1 9:2 10:4 11:2 12:1 13:2 14:7 15:0 16:4 17:62 18:5.9 19:2 20:2.2 +1 0:2 1:1 2:38.2 3:60 4:24 5:3 6:1 7:3 8:2 9:3 10:3 11:2 12:3 13:3 14:0 15:4 16:4 17:53 18:7.5 19:2 20:1.4 +-1 0:1 1:1 2:0 3:140 4:0 5:0 6:0 7:4 8:2 9:5 10:4 11:4 12:1 13:1 14:0 15:0 16:5 17:30 18:69 19:0 20:0 +-1 0:1 1:1 2:37.9 3:120 4:60 5:3 6:3 7:3 8:1 9:5 10:4 11:4 12:2 13:2 14:7.5 15:4 16:5 17:52 18:6.6 19:3 20:1.8 +1 0:2 1:1 2:38 3:72 4:36 5:1 6:1 7:3 8:1 9:3 10:0 11:2 12:2 13:1 14:0 15:3 16:5 17:38 18:6.8 19:2 20:2 +1 0:2 1:9 2:38 3:92 4:28 5:1 6:1 7:2 8:1 9:1 10:3 11:2 12:3 13:0 14:7.2 15:0 16:0 17:37 18:6.1 19:1 20:1.1 +1 0:1 1:1 2:38.3 3:66 4:30 5:2 6:3 7:1 8:1 9:2 10:4 11:3 12:3 13:2 14:8.5 15:4 16:5 17:37 18:6 19:0 20:0 +1 0:2 1:1 2:37.5 3:48 4:24 5:3 6:1 7:1 8:1 9:2 10:1 11:0 12:1 13:1 14:0 15:3 16:2 17:43 18:6 19:1 20:2.8 +-1 0:1 1:1 2:37.5 3:88 4:20 5:2 6:3 7:3 8:1 9:4 10:3 11:3 12:0 13:0 14:0 15:0 16:0 17:35 18:6.4 19:1 20:0 +-1 0:2 1:9 2:0 3:150 4:60 5:4 6:4 7:4 8:2 9:5 10:4 11:4 12:0 13:0 14:0 15:0 16:0 17:0 18:0 19:0 20:0 +-1 0:1 1:1 2:39.7 3:100 4:30 5:0 6:0 7:6 8:2 9:4 10:4 11:3 12:1 13:0 14:0 15:4 16:5 17:65 18:75 19:0 20:0 +1 0:1 1:1 2:38.3 3:80 4:0 5:3 6:3 7:4 8:2 9:5 10:4 11:3 12:2 13:1 14:0 15:4 16:4 17:45 18:7.5 19:2 20:4.6 +1 0:2 1:1 2:37.5 3:40 4:32 5:3 6:1 7:3 8:1 9:3 10:2 11:3 12:2 13:1 14:0 15:0 16:5 17:32 18:6.4 19:1 20:1.1 +-1 0:1 1:1 2:38.4 3:84 4:30 5:3 6:1 7:5 8:2 9:4 10:3 11:3 12:2 13:3 14:6.5 15:4 16:4 17:47 18:7.5 19:3 20:0 +-1 0:1 1:1 2:38.1 3:84 4:44 5:4 6:0 7:4 8:2 9:5 10:3 11:1 12:1 13:3 14:5 15:0 16:4 17:60 18:6.8 19:0 20:5.7 +1 0:2 1:1 2:38.7 3:52 4:0 5:1 6:1 7:1 8:1 9:1 10:3 11:1 12:0 13:0 14:0 15:1 16:3 17:4 18:74 19:0 20:0 +1 0:2 1:1 2:38.1 3:44 4:40 5:2 6:1 7:3 8:1 9:3 10:3 11:1 12:0 13:0 14:0 15:1 16:3 17:35 18:6.8 19:0 20:0 +1 0:2 1:1 2:38.4 3:52 4:20 5:2 6:1 7:3 8:1 9:1 10:3 11:2 12:2 13:1 14:0 15:3 16:5 17:41 18:63 19:1 20:1 +1 0:1 1:1 2:38.2 3:60 4:0 5:1 6:0 7:3 8:1 9:2 10:1 11:1 12:1 13:1 14:0 15:4 16:4 17:43 18:6.2 19:2 20:3.9 +1 0:2 1:1 2:37.7 3:40 4:18 5:1 6:1 7:1 8:0 9:3 10:2 11:1 12:1 13:1 14:0 15:3 16:3 17:36 18:3.5 19:0 20:0 +1 0:1 1:1 2:39.1 3:60 4:10 5:0 6:1 7:1 8:0 9:2 10:3 11:0 12:0 13:0 14:0 15:4 16:4 17:0 18:0 19:0 20:0 +1 0:2 1:1 2:37.8 3:48 4:16 5:1 6:1 7:1 8:1 9:0 10:1 11:1 12:2 13:1 14:0 15:4 16:3 17:43 18:7.5 19:0 20:0 +1 0:1 1:1 2:39 3:120 4:0 5:4 6:3 7:5 8:2 9:2 10:4 11:3 12:2 13:3 14:8 15:0 16:0 17:65 18:8.199999999999999 19:3 20:4.6 +1 0:1 1:1 2:38.2 3:76 4:0 5:2 6:3 7:2 8:1 9:5 10:3 11:3 12:1 13:2 14:6 15:1 16:5 17:35 18:6.5 19:2 20:0.9 +-1 0:2 1:1 2:38.3 3:88 4:0 5:0 6:0 7:6 8:0 9:0 10:0 11:0 12:0 13:0 14:0 15:0 16:0 17:0 18:0 19:0 20:0 +1 0:1 1:1 2:38 3:80 4:30 5:3 6:3 7:3 8:1 9:0 10:0 11:0 12:0 13:0 14:6 15:0 16:0 17:48 18:8.300000000000001 19:0 20:4.3 +-1 0:1 1:1 2:0 3:0 4:0 5:3 6:1 7:1 8:1 9:2 10:3 11:3 12:1 13:3 14:6 15:4 16:4 17:0 18:0 19:2 20:0 +1 0:1 1:1 2:37.6 3:40 4:0 5:1 6:1 7:1 8:1 9:1 10:1 11:1 12:0 13:0 14:0 15:1 16:1 17:0 18:0 19:2 20:2.1 +1 0:2 1:1 2:37.5 3:44 4:0 5:1 6:1 7:1 8:1 9:3 10:3 11:2 12:0 13:0 14:0 15:0 16:0 17:45 18:5.8 19:2 20:1.4 +1 0:2 1:1 2:38.2 3:42 4:16 5:1 6:1 7:3 8:1 9:1 10:3 11:1 12:0 13:0 14:0 15:1 16:0 17:35 18:60 19:1 20:1 +1 0:2 1:1 2:38 3:56 4:44 5:3 6:3 7:3 8:0 9:0 10:1 11:1 12:2 13:1 14:0 15:4 16:0 17:47 18:70 19:2 20:1 +1 0:2 1:1 2:38.3 3:45 4:20 5:3 6:3 7:2 8:2 9:2 10:4 11:1 12:2 13:0 14:0 15:4 16:0 17:0 18:0 19:0 20:0 +1 0:1 1:1 2:0 3:48 4:96 5:1 6:1 7:3 8:1 9:0 10:4 11:1 12:2 13:1 14:0 15:1 16:4 17:42 18:8 19:1 20:0 +1 0:1 1:1 2:37.7 3:55 4:28 5:2 6:1 7:2 8:1 9:2 10:3 11:3 12:0 13:3 14:5 15:4 16:5 17:0 18:0 19:0 20:0 +-1 0:2 1:1 2:36 3:100 4:20 5:4 6:3 7:6 8:2 9:2 10:4 11:3 12:1 13:1 14:0 15:4 16:5 17:74 18:5.7 19:2 20:2.5 +1 0:1 1:1 2:37.1 3:60 4:20 5:2 6:0 7:4 8:1 9:3 10:0 11:3 12:0 13:2 14:5 15:3 16:4 17:64 18:8.5 19:2 20:0 +1 0:2 1:1 2:37.1 3:114 4:40 5:3 6:0 7:3 8:2 9:2 10:2 11:1 12:0 13:0 14:0 15:0 16:3 17:32 18:0 19:3 20:6.5 +1 0:1 1:1 2:38.1 3:72 4:30 5:3 6:3 7:3 8:1 9:4 10:4 11:3 12:2 13:1 14:0 15:3 16:5 17:37 18:56 19:3 20:1 +1 0:1 1:1 2:37 3:44 4:12 5:3 6:1 7:1 8:2 9:1 10:1 11:1 12:0 13:0 14:0 15:4 16:2 17:40 18:6.7 19:3 20:8 +1 0:1 1:1 2:38.6 3:48 4:20 5:3 6:1 7:1 8:1 9:4 10:3 11:1 12:0 13:0 14:0 15:3 16:0 17:37 18:75 19:0 20:0 +-1 0:1 1:1 2:0 3:82 4:72 5:3 6:1 7:4 8:1 9:2 10:3 11:3 12:0 13:3 14:0 15:4 16:4 17:53 18:65 19:3 20:2 +-1 0:1 1:9 2:38.2 3:78 4:60 5:4 6:4 7:6 8:0 9:3 10:3 11:3 12:0 13:0 14:0 15:1 16:0 17:59 18:5.8 19:3 20:3.1 +-1 0:2 1:1 2:37.8 3:60 4:16 5:1 6:1 7:3 8:1 9:2 10:3 11:2 12:1 13:2 14:0 15:3 16:0 17:41 18:73 19:0 20:0 +-1 0:1 1:1 2:38.7 3:34 4:30 5:2 6:0 7:3 8:1 9:2 10:3 11:0 12:0 13:0 14:0 15:0 16:0 17:33 18:69 19:0 20:2 +1 0:1 1:1 2:0 3:36 4:12 5:1 6:1 7:1 8:1 9:1 10:2 11:1 12:1 13:1 14:0 15:1 16:5 17:44 18:0 19:0 20:0 +1 0:2 1:1 2:38.3 3:44 4:60 5:0 6:0 7:1 8:1 9:0 10:0 11:0 12:0 13:0 14:0 15:0 16:0 17:6.4 18:36 19:0 20:0 +1 0:2 1:1 2:37.4 3:54 4:18 5:3 6:0 7:1 8:1 9:3 10:4 11:3 12:2 13:2 14:0 15:4 16:5 17:30 18:7.1 19:2 20:0 +1 0:1 1:1 2:0 3:0 4:0 5:4 6:3 7:0 8:2 9:2 10:4 11:1 12:0 13:0 14:0 15:0 16:0 17:54 18:76 19:3 20:2 +-1 0:1 1:1 2:36.6 3:48 4:16 5:3 6:1 7:3 8:1 9:4 10:1 11:1 12:1 13:1 14:0 15:0 16:0 17:27 18:56 19:0 20:0 +1 0:1 1:1 2:38.5 3:90 4:0 5:1 6:1 7:3 8:1 9:3 10:3 11:3 12:2 13:3 14:2 15:4 16:5 17:47 18:79 19:0 20:0 +1 0:1 1:1 2:0 3:75 4:12 5:1 6:1 7:4 8:1 9:5 10:3 11:3 12:0 13:3 14:5.8 15:0 16:0 17:58 18:8.5 19:1 20:0 +1 0:2 1:1 2:38.2 3:42 4:0 5:3 6:1 7:1 8:1 9:1 10:1 11:2 12:2 13:1 14:0 15:3 16:2 17:35 18:5.9 19:2 20:0 +-1 0:1 1:9 2:38.2 3:78 4:60 5:4 6:4 7:6 8:0 9:3 10:3 11:3 12:0 13:0 14:0 15:1 16:0 17:59 18:5.8 19:3 20:3.1 +1 0:2 1:1 2:38.6 3:60 4:30 5:1 6:1 7:3 8:1 9:4 10:2 11:2 12:1 13:1 14:0 15:0 16:0 17:40 18:6 19:1 20:0 +1 0:2 1:1 2:37.8 3:42 4:40 5:1 6:1 7:1 8:1 9:1 10:3 11:1 12:0 13:0 14:0 15:3 16:3 17:36 18:6.2 19:0 20:0 +-1 0:1 1:1 2:38 3:60 4:12 5:1 6:1 7:2 8:1 9:2 10:1 11:1 12:1 13:1 14:0 15:1 16:4 17:44 18:65 19:3 20:2 +1 0:2 1:1 2:38 3:42 4:12 5:3 6:0 7:3 8:1 9:1 10:1 11:1 12:0 13:0 14:0 15:0 16:1 17:37 18:5.8 19:0 20:0 +-1 0:2 1:1 2:37.6 3:88 4:36 5:3 6:1 7:1 8:1 9:3 10:3 11:2 12:1 13:3 14:1.5 15:0 16:0 17:44 18:6 19:0 20:0 diff --git a/src/python/7.AdaBoost/sklearn-adaboost-demo.py b/src/python/7.AdaBoost/sklearn-adaboost-demo.py index c685d9c9..4d80b045 100644 --- a/src/python/7.AdaBoost/sklearn-adaboost-demo.py +++ b/src/python/7.AdaBoost/sklearn-adaboost-demo.py @@ -1,6 +1,5 @@ #!/usr/bin/python # coding:utf8 - """ Created on 2017-07-10 Updated on 2017-07-10 @@ -8,80 +7,55 @@ Updated on 2017-07-10 《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning sklearn-AdaBoost译文链接: http://cwiki.apachecn.org/pages/viewpage.action?pageId=10813457 """ + import matplotlib.pyplot as plt +# importing necessary libraries import numpy as np -from sklearn.datasets import make_gaussian_quantiles -from sklearn.ensemble import AdaBoostClassifier -from sklearn.tree import DecisionTreeClassifier +from sklearn import metrics +from sklearn.ensemble import AdaBoostRegressor +from sklearn.tree import DecisionTreeRegressor print(__doc__) -# Construct dataset -X1, y1 = make_gaussian_quantiles(cov=2., - n_samples=200, n_features=2, - n_classes=2, random_state=1) -X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5, - n_samples=300, n_features=2, - n_classes=2, random_state=1) -X = np.concatenate((X1, X2)) -y = np.concatenate((y1, - y2 + 1)) -# Create and fit an AdaBoosted decision tree -bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), - algorithm="SAMME", - n_estimators=200) +# Create the dataset +rng = np.random.RandomState(1) +X = np.linspace(0, 6, 100)[:, np.newaxis] +y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0]) +# dataArr, labelArr = loadDataSet("input/7.AdaBoost/horseColicTraining2.txt") -bdt.fit(X, y) -plot_colors = "br" -plot_step = 0.02 -class_names = "AB" +# Fit regression model +regr_1 = DecisionTreeRegressor(max_depth=4) +regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=300, random_state=rng) -plt.figure(figsize=(10, 5)) +regr_1.fit(X, y) +regr_2.fit(X, y) -# Plot the decision boundaries -plt.subplot(121) -x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 -y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 -xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), - np.arange(y_min, y_max, plot_step)) +# Predict +y_1 = regr_1.predict(X) +y_2 = regr_2.predict(X) -Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()]) -Z = Z.reshape(xx.shape) -cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired) -plt.axis("tight") - -# Plot the training points -for i, n, c in zip(range(2), class_names, plot_colors): - idx = np.where(y == i) - plt.scatter(X[idx, 0], X[idx, 1], - c=c, cmap=plt.cm.Paired, - label="Class %s" % n) -plt.xlim(x_min, x_max) -plt.ylim(y_min, y_max) -plt.legend(loc='upper right') -plt.xlabel('x') -plt.ylabel('y') -plt.title('Decision Boundary') - -# Plot the two-class decision scores -twoclass_output = bdt.decision_function(X) -plot_range = (twoclass_output.min(), twoclass_output.max()) -plt.subplot(122) -for i, n, c in zip(range(2), class_names, plot_colors): - plt.hist(twoclass_output[y == i], - bins=10, - range=plot_range, - facecolor=c, - label='Class %s' % n, - alpha=.5) -x1, x2, y1, y2 = plt.axis() -plt.axis((x1, x2, y1, y2 * 1.2)) -plt.legend(loc='upper right') -plt.ylabel('Samples') -plt.xlabel('Score') -plt.title('Decision Scores') - -plt.tight_layout() -plt.subplots_adjust(wspace=0.35) +# Plot the results +plt.figure() +plt.scatter(X, y, c="k", label="training samples") +plt.plot(X, y_1, c="g", label="n_estimators=1", linewidth=2) +plt.plot(X, y_2, c="r", label="n_estimators=300", linewidth=2) +plt.xlabel("data") +plt.ylabel("target") +plt.title("Boosted Decision Tree Regression") +plt.legend() plt.show() + +print 'y---', type(y[0]), len(y), y[:4] +print 'y_1---', type(y_1[0]), len(y_1), y_1[:4] +print 'y_2---', type(y_2[0]), len(y_2), y_2[:4] + +# 适合2分类 +y_true = np.array([0, 0, 1, 1]) +y_scores = np.array([0.1, 0.4, 0.35, 0.8]) +print 'y_scores---', type(y_scores[0]), len(y_scores), y_scores +print metrics.roc_auc_score(y_true, y_scores) + +# print "-" * 100 +# print metrics.roc_auc_score(y[:1], y_2[:1]) diff --git a/tools/python2libsvm.py b/tools/python2libsvm.py new file mode 100644 index 00000000..5e75c7b9 --- /dev/null +++ b/tools/python2libsvm.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +# coding:utf8 + +import os +import sklearn.datasets as datasets + + +def get_data(file_input, separator='\t'): + if 'libsvm' not in file_input: + file_input = other2libsvm(file_input, separator) + data = datasets.load_svmlight_file(file_input) + return data[0], data[1] + + +def other2libsvm(file_name, separator='\t'): + + libsvm_name = file_name.replace('.txt', '.libsvm_tmp') + libsvm_data = open(libsvm_name, 'w') + + file_data = open(file_name, 'r') + for line in file_data.readlines(): + features = line.strip().split(separator) + # print len(features) + class_data = features[-1] + svm_format = '' + for i in range(len(features)-1): + svm_format += " %d:%s" % (i+1, features[i]) + # print svm_format + svm_format = "%s%s\n" % (class_data, svm_format) + # print svm_format + libsvm_data.write(svm_format) + file_data.close() + + libsvm_data.close() + return libsvm_name + + +def dump_data(x, y, file_output): + datasets.dump_svmlight_file(x, y, file_output) + os.remove("%s_tmp" % file_output) + + +if __name__ == "__main__": + file_input = "input/7.AdaBoost/horseColicTest2.txt" + file_output = "input/7.AdaBoost/horseColicTest2.libsvm" + + # 获取数据集 + x, y = get_data(file_input, separator='\t') + print x[3, :] + print y + # 导出数据为 libsvm + dump_data(x, y, file_output)