From 5a2eb7ed4dc6e499c3cf6f45242809ef01f446d9 Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Fri, 27 May 2022 23:53:21 +0800 Subject: [PATCH] 2.4beta --- .idea/Bangumi_Auto_Rename.iml | 2 +- .idea/misc.xml | 2 +- .../app/RssFliter/RSSFliter.py | 33 ++----------------- .../app/RssFliter}/rule.json | 0 AutoBangumi/app/collect_bangumi_info.py | 29 +++++++++++++--- AutoBangumi/app/env.py | 14 +++++++- 6 files changed, 41 insertions(+), 39 deletions(-) rename Windows/rename/rename.py => AutoBangumi/app/RssFliter/RSSFliter.py (95%) rename {Windows/rename => AutoBangumi/app/RssFliter}/rule.json (100%) diff --git a/.idea/Bangumi_Auto_Rename.iml b/.idea/Bangumi_Auto_Rename.iml index d0876a78..8437fe66 100644 --- a/.idea/Bangumi_Auto_Rename.iml +++ b/.idea/Bangumi_Auto_Rename.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 2d83d70f..dc9ea490 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/Windows/rename/rename.py b/AutoBangumi/app/RssFliter/RSSFliter.py similarity index 95% rename from Windows/rename/rename.py rename to AutoBangumi/app/RssFliter/RSSFliter.py index c85624d3..3a85fe26 100644 --- a/Windows/rename/rename.py +++ b/AutoBangumi/app/RssFliter/RSSFliter.py @@ -1,28 +1,10 @@ import re -import csv import json -import time - import zhconv -import requests import logging -import pandas as pd -def read_data(name, rows): - if name == "mikan": - with open('mikan.csv', 'r', encoding='utf-8') as csv_file: - reader = csv.reader(csv_file) - raw_data = [row[3] for row in reader][0:rows] - return raw_data - elif name == "dmhy": - with open('dmhy.csv', 'r', encoding='utf-8') as csv_file: - reader = csv.reader(csv_file) - raw_data = [row[4] for row in reader][1:rows + 1] - return raw_data - - -class Rename: +class RSSInfoCleaner: class Name: raw_name = None group = None @@ -451,14 +433,12 @@ class Rename: try: res = re.search("(^[a\u4e00-\u9fa5: ]{1,10} ?)([a-z:]{1,20} ?){1,10}", clean_name).group(1) clean_name = clean_name.replace(res, res.strip(" ") + "/") - print("zh_pre:%s" % clean_name) except Exception as e: logging.info(e) else: try: res = re.search("^(([a-z:]{1,20} ?){1,10} )[\u4e00-\u9fa5: a]{1,20}", clean_name).group(1) clean_name = clean_name.replace(res, res.strip(" ") + "/") - print("en_pre:%s" % clean_name) except Exception as e: logging.info(e) except Exception as e: @@ -502,6 +482,7 @@ class Rename: # 字母全部小写 clean_name = self.Name.file_name.lower() + # clean_name = self.Name.file_name # 去除拿到的有效信息 for k, v in info.items(): if v is not None: @@ -525,7 +506,6 @@ class Rename: clean_name = re.sub('[^a-zA-Z\u4e00-\u9fa5:@#$%^&*()\[\]/ ]', "", clean_name) clean_name = re.sub(' +', ' ', clean_name).strip(" ") clean_name = re.sub("([(\[] *| *[)\]])", "", clean_name) - print(clean_name) zh_list = [] en_list = [] @@ -540,12 +520,3 @@ class Rename: return info -if __name__ == "__main__": - # mikan/dmhy 获取数据,dmhy 最多1w行,mikan最多3w行 - name_list = read_data("dmhy", 1000) - start = time.time() - for name in name_list: - print(name) - print(Rename(name).Name.zh) - print() - print("%s" % (time.time() - start)) diff --git a/Windows/rename/rule.json b/AutoBangumi/app/RssFliter/rule.json similarity index 100% rename from Windows/rename/rule.json rename to AutoBangumi/app/RssFliter/rule.json diff --git a/AutoBangumi/app/collect_bangumi_info.py b/AutoBangumi/app/collect_bangumi_info.py index 23ce19f1..13664583 100644 --- a/AutoBangumi/app/collect_bangumi_info.py +++ b/AutoBangumi/app/collect_bangumi_info.py @@ -4,8 +4,8 @@ import requests from bs4 import BeautifulSoup import json import re -from env import EnvInfo - +from env import EnvInfo, BColors +from AutoBangumi.app.RssFliter.RSSFliter import RSSInfoCleaner as Cleaner class MatchRule: split_rule = r"\[|\]|\【|\】|\★|\(|\)|\(|\)" @@ -118,6 +118,25 @@ class CollectRSS: if __name__ == "__main__": - cr = CollectRSS() - cr.get_info_list() - cr.put_info_json() \ No newline at end of file + rss = requests.get(EnvInfo.rss_link, 'utf-8') + soup = BeautifulSoup(rss.text, 'xml') + items = soup.find_all('item') + for item in items: + name = item.title.string + print(BColors.HEADER + name + BColors.OKGREEN) + pn = Cleaner(name).Name + if pn.en is not None: + if type(pn.en) is list: + for n in pn.en: + print(n) + else: + print(pn.en) + else: + if type(pn.zh) is list: + for n in pn.zh: + print(n) + else: + print(pn.zh) + + # print(BColors.HEADER + name) + # print(BColors.OKGREEN + str(pn.Name.en)) \ No newline at end of file diff --git a/AutoBangumi/app/env.py b/AutoBangumi/app/env.py index 7d6c0303..170413d5 100644 --- a/AutoBangumi/app/env.py +++ b/AutoBangumi/app/env.py @@ -3,7 +3,7 @@ import time class EnvInfo: - debug_mode = False + debug_mode = True # Docker Env if not debug_mode: host_ip = os.environ["HOST"] @@ -36,3 +36,15 @@ class EnvInfo: rule_url = "https://raw.githubusercontent.com/EstrellaXD/Bangumi_Auto_Collector/main/AutoBangumi/config/rule.json" time_show_obj = time.strftime('%Y-%m-%d %X') rule_name_re = r"\:|\/|\." + + +class BColors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m'