diff --git a/AutoBangumi/app/RssFliter/RSSFliter.py b/AutoBangumi/app/RSSFilter.py similarity index 97% rename from AutoBangumi/app/RssFliter/RSSFliter.py rename to AutoBangumi/app/RSSFilter.py index 364405a6..ff33741c 100644 --- a/AutoBangumi/app/RssFliter/RSSFliter.py +++ b/AutoBangumi/app/RSSFilter.py @@ -41,7 +41,7 @@ class RSSInfoCleaner: self.clean() # 清理广告等杂质 # 加载日志,匹配特征等 logging.basicConfig(level=logging.DEBUG, - filename='./rename_log.txt', + filename='RssFilter/rename_log.txt', filemode='w', format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') self.group_character = ['字幕社', '字幕组', '字幕屋', '发布组', '动漫', '国漫', '汉化', 'raw', 'works', '工作室', '压制', '合成', '制作', @@ -49,7 +49,7 @@ class RSSInfoCleaner: self.group_char = ['dmhy', '澄空学园', 'c.c动漫', "vcb", 'amor', 'moozzi2', 'skytree', 'sweetsub', 'pcsub', 'ahu-sub', 'f宅', 'captions', 'dragsterps', 'onestar', "lolihouse", "天空树", '卡通', '时雨初空', 'nyaa', 'ddd', 'koten', 'reinforce', '届恋对邦小队', 'cxraw'] - with open("rule.json", encoding='utf-8') as file_obj: + with open("../config/clean_rule.json", encoding='utf-8') as file_obj: rule_json = json.load(file_obj)[0]["group_name"] self.group_rule = [zhconv.convert(x, 'zh-cn') for x in rule_json] self.file_info = {} @@ -250,7 +250,7 @@ class RSSInfoCleaner: for _ in range(3): try: res = re.search( - "[((\[【]?((bd|remux|(viu)?tvb?|bilibili|b ?global|baha|web[ -]?(dl|rip))[ -]?(iso|mut|rip)?)[))\]】]?", + "[((\[【]?((bd|BD-BOX|bd-b0x|psv&pc|remux|(viu)?tvb?|bilibili|b ?global|baha|web[ -]?(dl|rip))[ -]?(iso|mut|rip)?)[))\]】]?", file_name).group(1).lower().strip(" ") if res not in type_list: type_list.append(res) @@ -582,6 +582,8 @@ class RSSInfoCleaner: self.easy_split(temp_name, zh_list, en_list, jp_list) elif zh_list == [] and en_list == []: self.extract_title(clean_name) + while "" in en_list: + en_list.remove("") self.Name.zh = zh_list if zh_list else None self.Name.en = en_list if en_list else None @@ -596,12 +598,12 @@ if __name__ == "__main__": def read_data(file_name, rows): if file_name == "mikan": - with open('mikan.csv', 'r', encoding='utf-8') as csv_file: + with open('RssFilter/mikan.csv', 'r', encoding='utf-8') as csv_file: reader = csv.reader(csv_file) raw_data = [row[3] for row in reader][0:rows] return raw_data elif file_name == "dmhy": - with open('dmhy.csv', 'r', encoding='utf-8') as csv_file: + with open('RssFilter/dmhy.csv', 'r', encoding='utf-8') as csv_file: reader = csv.reader(csv_file) raw_data = [row[4] for row in reader][1:rows + 1] return raw_data diff --git a/AutoBangumi/app/RssFliter/dmhy.csv b/AutoBangumi/app/RssFilter/dmhy.csv similarity index 100% rename from AutoBangumi/app/RssFliter/dmhy.csv rename to AutoBangumi/app/RssFilter/dmhy.csv diff --git a/AutoBangumi/app/RssFliter/mikan.csv b/AutoBangumi/app/RssFilter/mikan.csv similarity index 100% rename from AutoBangumi/app/RssFliter/mikan.csv rename to AutoBangumi/app/RssFilter/mikan.csv diff --git a/AutoBangumi/app/collect_bangumi_info.py b/AutoBangumi/app/collect_bangumi_info.py index 13664583..5f13f1e4 100644 --- a/AutoBangumi/app/collect_bangumi_info.py +++ b/AutoBangumi/app/collect_bangumi_info.py @@ -5,7 +5,7 @@ from bs4 import BeautifulSoup import json import re from env import EnvInfo, BColors -from AutoBangumi.app.RssFliter.RSSFliter import RSSInfoCleaner as Cleaner +from RSSFilter import RSSInfoCleaner as Filter class MatchRule: split_rule = r"\[|\]|\【|\】|\★|\(|\)|\(|\)" @@ -124,19 +124,8 @@ if __name__ == "__main__": for item in items: name = item.title.string print(BColors.HEADER + name + BColors.OKGREEN) - pn = Cleaner(name).Name - if pn.en is not None: - if type(pn.en) is list: - for n in pn.en: - print(n) - else: - print(pn.en) - else: - if type(pn.zh) is list: - for n in pn.zh: - print(n) - else: - print(pn.zh) + pn = Filter(name).Name + print(pn.clean) # print(BColors.HEADER + name) # print(BColors.OKGREEN + str(pn.Name.en)) \ No newline at end of file diff --git a/AutoBangumi/app/env.py b/AutoBangumi/app/env.py index 170413d5..527ef7c0 100644 --- a/AutoBangumi/app/env.py +++ b/AutoBangumi/app/env.py @@ -15,7 +15,7 @@ class EnvInfo: method = os.environ["METHOD"] enable_group_tag = os.getenv("GROUP_TAG", 'False').lower() in ('true', '1', 't') info_path = "/config/bangumi.json" - rule_path = "/config/rule.json" + rule_path = "/config/clean_rule.json" not_contain = os.environ["NOT_CONTAIN"] get_rule_debug = os.getenv("RULE_DEBUG", 'False').lower() in ('true', '1', 't') else: diff --git a/AutoBangumi/app/RssFliter/rule.json b/AutoBangumi/config/clean_rule.json similarity index 100% rename from AutoBangumi/app/RssFliter/rule.json rename to AutoBangumi/config/clean_rule.json