This commit is contained in:
Sean
2022-06-03 14:43:13 +08:00
parent 32a82e5c4d
commit 6c99ba41df
23 changed files with 10452 additions and 351 deletions

22
.vscode/launch.json vendored
View File

@@ -5,16 +5,7 @@
"version": "0.2.0",
"configurations": [
{
"name": "Python: 当前文件",
"type": "python",
"request": "launch",
"program": "${file}",
"cwd": "${workspaceFolder}/AutoBangumi/app",
"console": "integratedTerminal",
"justMyCode": true
},
{
"name": "Python: docker_main",
"name": "Python: app",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/AutoBangumi/app/app.py",
@@ -24,6 +15,15 @@
"cwd": "${workspaceFolder}/AutoBangumi/app",
"console": "integratedTerminal",
"justMyCode": true
}
},
{
"name": "Python: 当前文件",
"type": "python",
"request": "launch",
"program": "${file}",
"cwd": "${workspaceFolder}/AutoBangumi/app",
"console": "integratedTerminal",
"justMyCode": true
},
]
}

View File

@@ -1,34 +1,60 @@
from ast import arg
import os
import time
import logging
from collect_info import CollectRSS
from set_rule import SetRule
from rename_qb import qBittorrentRename
from conf import settings
from argument_parser import parse
from log import setup_logger
from utils import json_config
def create_data_file():
if not os.path.exists(settings.info_path):
bangumi_info = {"rss_link": "", "bangumi_info": []}
json_config.save(settings.info_path, bangumi_info)
from core.rss_collector import RSSCollector
from core.download_client import DownloadClient
from core.renamer import Renamer
if __name__ == "__main__":
logger = logging.getLogger(__name__)
def load_data_file():
info_path = settings.info_path
if not os.path.exists(info_path):
bangumi_data = {"rss_link": "", "bangumi_info": []}
else:
bangumi_data = json_config.load(info_path)
return bangumi_data
def save_data_file(bangumi_data):
info_path = settings.info_path
json_config.save(info_path, bangumi_data)
def run():
args = parse()
if args.debug:
from const_dev import DEV_SETTINGS
settings.init(DEV_SETTINGS)
else:
settings.init()
setup_logger()
create_data_file()
SetRule().rss_feed()
bangumi_data = load_data_file()
download_client = DownloadClient()
download_client.rss_feed()
rss_collector = RSSCollector()
renamer = Renamer(download_client)
while True:
CollectRSS().run()
SetRule().run()
qBittorrentRename().run()
time.sleep(settings.sleep_time)
try:
rss_collector.collect(bangumi_data)
download_client.add_rules(bangumi_data["bangumi_info"])
renamer.run()
save_data_file(bangumi_data)
time.sleep(settings.sleep_time)
except Exception as e:
if args.debug:
raise e
logger.exception(e)
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,6 @@
from bangumi_parser.episode import Episode
class Analyser():
def analyse(self,name, tokens) -> Episode:
episode = Episode()
return episode

View File

@@ -0,0 +1,90 @@
import re
import logging
import requests
from conf import settings
from utils import json_config
from bangumi_parser.episode import Episode
logger = logging.getLogger(__name__)
class MatchRule:
split_rule = r"\[|\]|\【|\】|\★|\|\|\(|\)"
last_rule = r"(.*)( \-)"
sub_title = r"[^\x00-\xff]{1,}| \d{1,2}^.*|\·"
match_rule = r"(S\d{1,2}(.*))"
season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季|第.*期)"
season_number_match = r"(\d+)"
# 简单往往是最好的
class SimpleAnalyser:
def __init__(self) -> None:
self.rules = json_config.load(settings.rule_path)
try:
self.rules = requests.get(settings.rule_url).json()
json_config.save(settings.rule_path, self.rules)
except Exception as e:
logger.exception(e)
def analyse(self, name) -> Episode:
flag = False
for rule in self.rules:
for group in rule["group_name"]:
if re.search(group, name):
n = re.split(MatchRule.split_rule, name)
while "" in n:
n.remove("")
while " " in n:
n.remove(" ")
try:
title = n[rule["name_position"]].strip()
except IndexError:
continue
sub_title = re.sub(MatchRule.sub_title, "", title)
b = re.split(r"\/|\_", sub_title)
while "" in b:
b.remove("")
pre_name = max(b, key=len, default="").strip()
if len(pre_name.encode()) > 3:
title = pre_name
for i in range(2):
match_obj = re.match(MatchRule.last_rule, title, re.I)
if match_obj is not None:
title = match_obj.group(1).strip()
match_obj = re.match(MatchRule.match_rule, title, re.I)
if match_obj is not None:
title = match_obj.group(2).strip()
# debug
# print(bangumi_title)
# print(group)
flag = True
break
if flag:
break
if not flag:
logger.debug("ERROR Not match with {name}")
return
match_title_season = re.match(MatchRule.season_match, title, re.I)
if match_title_season is not None:
title = match_title_season.group(1).strip()
season = match_title_season.group(2)
match_season_number = re.findall(MatchRule.season_number_match, season)
try:
season_number = int(match_season_number[0])
except:
logger.warning(
f"title:{title} season:{season} can't match season in number"
)
finally:
season_number = 1
else:
season = "S01"
season_number = 1
episode = Episode()
episode.title = title
episode.group = group
episode.season_info.raw = season
episode.season_info.number = season_number
return episode

View File

@@ -0,0 +1,38 @@
from dataclasses import dataclass
@dataclass
class Episode:
@dataclass
class TitleInfo:
def __init__(self) -> None:
self.raw: str = None
self.name: str = None
@dataclass
class SeasonInfo:
def __init__(self) -> None:
self.raw: str = None
self.number: int = None
@dataclass
class NumberInfo:
def __init__(self) -> None:
self.raw: str = None
self.number: int = None
@property
def title(self) -> str:
return self.title_info.name
@title.setter
def title(self, title: str):
self.title_info.name = title
def __init__(self) -> None:
self.group: str = None
self.title_info = Episode.TitleInfo()
self.season_info = Episode.SeasonInfo()
self.number_info = Episode.NumberInfo()
self.format: str = None
self.subtitle: str = None

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,39 @@
import logging
logger = logging.getLogger(__name__)
from preprocessor import Preprocessor
from token_generator import TokenGenerator
from analyser import Analyser
class Parser:
def __init__(self) -> None:
self._preprocessor = Preprocessor()
self._token_generator = TokenGenerator()
self._analyser = Analyser()
def parse(self, name: str):
name = self._preprocessor.preprocess(name)
tokens = self._token_generator.generate(name)
episode = self._analyser.analyse(name, tokens)
return episode, tokens, name
if __name__ == "__main__":
import sys, os
sys.path.append(os.path.dirname(".."))
from log import setup_logger
from const import BCOLORS
setup_logger()
parser = Parser()
with (open("parser/names.txt", "r", encoding="utf-8") as f):
for name in f:
if name != "":
episode, tokens, name = parser.parse(name)
if len(tokens) == 1:
logger.debug(f"{BCOLORS._(BCOLORS.HEADER, name)}")
logger.debug(f"{BCOLORS._(BCOLORS.OKGREEN,tokens)}")
logger.debug(f"{BCOLORS._(BCOLORS.WARNING,episode)}")

View File

@@ -0,0 +1,12 @@
from preprocessor.char_standardize import CharStandardize
class Preprocessor:
def __init__(self) -> None:
self._preprocessors = []
self._preprocessors.append(CharStandardize())
def preprocess(self, s: str) -> str:
for preprocessor in self._preprocessors:
s = preprocessor.preprocess(s)
return s

View File

@@ -0,0 +1,24 @@
replace_chars = {
"": "[",
"": "]",
"": ":",
"": "[",
"": "]",
"-": "-",
"": "(",
"": ")",
"": "&",
"X": "x",
"×": "x",
"": "x",
"__": "/",
"\n": "",
}
class CharStandardize:
def preprocess(self, name):
# 替换中文字符
for old, new in replace_chars.items():
name = name.replace(old, new)
return name

View File

@@ -0,0 +1,41 @@
delimiters = ["[", "]", " - ", " "]
class TokenGenerator:
def _get_tokens(self, name):
tokens = []
cursor = 0
start = 0
def append_token(n: str, c: int, s: int, step: int = 1) :
token = n[s:c]
token = token.strip()
tokens.append(token)
c += step
s = c
return c, s
while cursor <= len(name):
for t in delimiters:
step = len(t)
if name[cursor : cursor + step] == t:
if start < cursor:
# 前一个token完结
cursor, start = append_token(name, cursor, start, step)
else:
assert start == cursor
start += step
cursor += step
break
else:
if cursor == len(name):
# 最后一个
if start < cursor:
cursor, start = append_token(name, cursor, start)
break
cursor += 1
return tokens
def generate(self, s):
tokens = self._get_tokens(s)
return tokens

View File

@@ -1,151 +0,0 @@
# -*- coding: UTF-8 -*-
import os
import logging
import requests
from bs4 import BeautifulSoup
import json
import re
from conf import settings
from utils import json_config
# from RssFilter.RSSFilter import RSSInfoCleaner as Filter
logger = logging.getLogger(__name__)
class MatchRule:
split_rule = r"\[|\]|\【|\】|\★|\|\|\(|\)"
last_rule = r"(.*)( \-)"
sub_title = r"[^\x00-\xff]{1,}| \d{1,2}^.*|\·"
match_rule = r"(S\d{1,2}(.*))"
season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季|第.*期)"
season_number_match = r"(\d+)"
class CollectRSS:
def __init__(self):
self.bangumi_list = []
self.rules = json_config.load(settings.rule_path)
try:
self.rules = requests.get(settings.rule_url).json()
except Exception as e:
logger.exception(e)
json_config.save(settings.rule_path, self.rules)
try:
rss = requests.get(settings.rss_link, "utf-8")
except Exception as e:
logger.exception(e)
logger.error("ERROR with DNS/Connection.")
quit()
soup = BeautifulSoup(rss.text, "xml")
self.items = soup.find_all("item")
self.info = json_config.load(settings.info_path)
def get_info_list(self):
for item in self.items:
name = item.title.string
# debug 用
if settings.get_rule_debug:
logger.debug(f"Raw {name}")
exit_flag = False
for rule in self.rules:
for group in rule["group_name"]:
if re.search(group, name):
exit_flag = True
n = re.split(MatchRule.split_rule, name)
while "" in n:
n.remove("")
while " " in n:
n.remove(" ")
try:
bangumi_title = n[rule["name_position"]].strip()
except IndexError:
continue
sub_title = re.sub(MatchRule.sub_title, "", bangumi_title)
b = re.split(r"\/|\_", sub_title)
while "" in b:
b.remove("")
pre_name = max(b, key=len, default="").strip()
if len(pre_name.encode()) > 3:
bangumi_title = pre_name
for i in range(2):
match_obj = re.match(
MatchRule.last_rule, bangumi_title, re.I
)
if match_obj is not None:
bangumi_title = match_obj.group(1).strip()
match_obj = re.match(MatchRule.match_rule, bangumi_title, re.I)
if match_obj is not None:
bangumi_title = match_obj.group(2).strip()
if bangumi_title not in self.bangumi_list:
self.bangumi_list.append(
{"title": bangumi_title, "group": group}
)
# debug
# print(bangumi_title)
# print(group)
break
if exit_flag:
break
if not exit_flag:
logger.debug("ERROR Not match with {name}")
def put_info_json(self):
had_data = []
if self.info["rss_link"] == settings.rss_link:
for data in self.info["bangumi_info"]:
had_data.append(data["title"])
else:
self.info = {"rss_link": settings.rss_link, "bangumi_info": []}
for item in self.bangumi_list:
title = item["title"]
match_title_season = re.match(MatchRule.season_match, title, re.I)
if match_title_season is not None:
json_title = match_title_season.group(1).strip()
json_season = match_title_season.group(2)
match_season_number = re.findall(
MatchRule.season_number_match, json_season
)
if len(match_season_number) != 0:
json_season_number = int(match_season_number[0])
else:
logger.warning(
f"title:{title} season:{json_season} can't match season in number"
)
json_season_number = 1
else:
json_season = "S01"
json_season_number = 1
json_title = title
if json_title not in had_data:
self.info["bangumi_info"].append(
{
"title": json_title,
"season": json_season,
"season_number": json_season_number,
"group": item["group"],
"added": False,
}
)
had_data.append(json_title)
logger.debug("add {json_title} {json_season}")
json_config.save(settings.info_path, self.info)
def run(self):
self.get_info_list()
self.put_info_json()
if __name__ == "__main__":
# from const import BCOLORS
# rss = requests.get(settings.rss_link, 'utf-8')
# soup = BeautifulSoup(rss.text, 'xml')
# items = soup.find_all('item')
# for item in items:
# name = item.title.string
# pn = Filter(name).Name
# print(BCOLORS.HEADER + name)
# print(BCOLORS.OKGREEN + str(pn.zh))
# print(str(pn.en))
print(__file__)
print(os.path.dirname(__file__))

View File

@@ -38,14 +38,18 @@ ENV_TO_ATTR = {
FULL_SEASON_SUPPORT_GROUP = ["Lilith-Raws"]
BCOLORS = {
"HEADER": "\033[95m",
"OKBLUE": "\033[94m",
"OKCYAN": "\033[96m",
"OKGREEN": "\033[92m",
"WARNING": "\033[93m",
"FAIL": "\033[91m",
"ENDC": "\033[0m",
"BOLD": "\033[1m",
"UNDERLINE": "\033[4m",
}
class BCOLORS:
@staticmethod
def _(color: str, string: str) -> str:
return f"{color}{string}{BCOLORS.ENDC}"
HEADER = "\033[95m"
OKBLUE = "\033[94m"
OKCYAN = "\033[96m"
OKGREEN = "\033[92m"
WARNING = "\033[93m"
FAIL = "\033[91m"
ENDC = "\033[0m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"

View File

@@ -1,5 +1,5 @@
DEV_SETTINGS = {
"host_ip": "localhost:8181",
"host_ip": "qb.findix.cn",
"sleep_time": 10,
"info_path": "../config/bangumi.json",
"rule_path": "../config/rule.json",

View File

View File

@@ -11,12 +11,8 @@ from utils import json_config
logger = logging.getLogger(__name__)
class SetRule:
class DownloadClient:
def __init__(self):
self.info = json_config.load(settings.info_path)
self.bangumi_info = self.info["bangumi_info"]
self.rss_link = settings.rss_link
self.download_path = settings.download_path
self.client = getClient()
def set_rule(self, bangumi_name, group, season):
@@ -28,7 +24,7 @@ class SetRule:
"episodeFilter": "",
"smartFilter": False,
"previouslyMatchedEpisodes": [],
"affectedFeeds": [self.rss_link],
"affectedFeeds": [settings.rss_link],
"ignoreDays": 0,
"lastMatch": "",
"addPaused": False,
@@ -53,23 +49,33 @@ class SetRule:
except ConflictError:
logger.debug("No feed exists, starting adding feed.")
try:
self.client.rss_add_feed(url=self.rss_link, item_path="Mikan_RSS")
self.client.rss_add_feed(url=settings.rss_link, item_path="Mikan_RSS")
logger.debug("Successes adding RSS Feed.")
except ConnectionError:
logger.debug("Error with adding RSS Feed.")
except ConflictError:
logger.debug("RSS Already exists.")
def run(self):
def add_rules(self, bangumi_info):
logger.debug("Start adding rules.")
for info in self.bangumi_info:
for info in bangumi_info:
if not info["added"]:
self.set_rule(info["title"], info["group"], info["season"])
info["added"] = True
json_config.save(settings.info_path, self.info)
logger.debug("Finished.")
def get_torrent_info(self):
return self.client.torrents_info(
status_filter="completed", category="Bangumi"
)
def rename_torrent_file(self, hash, path_name, new_name):
self.client.torrents_rename_file(
torrent_hash=hash, old_path=path_name, new_path=new_name
)
logger.debug(f"{path_name} >> {new_name}")
if __name__ == "__main__":
put = SetRule()
put.run()
put = DownloadClient()
put.add_rules()

View File

@@ -0,0 +1,70 @@
import re
import logging
from core.download_client import DownloadClient
from conf import settings
logger = logging.getLogger(__name__)
rules = [
r"(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
r"(.*)\[E(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
r"(.*)\[第(\d*\.*\d*)话(?:END)?\](.*)",
r"(.*)\[第(\d*\.*\d*)話(?:END)?\](.*)",
r"(.*)第(\d*\.*\d*)话(?:END)?(.*)",
r"(.*)第(\d*\.*\d*)話(?:END)?(.*)",
r"(.*)- (\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)? (.*)",
]
class Renamer:
def __init__(self, downloadClient: DownloadClient):
self.client = downloadClient
self.rules = [re.compile(rule) for rule in rules]
def rename_normal(self, name):
for rule in self.rules:
matchObj = rule.match(name, re.I)
if matchObj is not None:
new_name = f"{matchObj.group(1).strip()} E{matchObj.group(2)}{matchObj.group(3)}"
return new_name
def rename_pn(self, name):
n = re.split(r"\[|\]", name)
file_name = name.replace(f"[{n[1]}]", "")
for rule in self.rules:
matchObj = rule.match(file_name, re.I)
if matchObj is not None:
new_name = re.sub(
r"\[|\]",
"",
f"{matchObj.group(1).strip()} E{matchObj.group(2)}{n[-1]}",
)
return new_name
def print_result(self, torrent_count, rename_count):
logger.debug(f"已完成对{torrent_count}个文件的检查")
logger.debug(f"已对其中{rename_count}个文件进行重命名")
logger.debug(f"完成")
def run(self):
recent_info = self.client.get_torrent_info()
rename_count = 0
torrent_count = len(recent_info)
method_dict = {"pn": self.rename_pn, "normal": self.rename_normal}
if settings.method not in method_dict:
logger.error(f"error method")
else:
for i in range(0, torrent_count):
try:
info = recent_info[i]
name = info.name
hash = info.hash
path_name = info.content_path.split("/")[-1]
new_name = method_dict[settings.method](name)
if path_name != new_name:
self.client.rename_torrent_file(hash, path_name, new_name)
rename_count += 1
except:
logger.warning(f"{name} rename fail")
self.print_result(torrent_count, rename_count)

View File

@@ -0,0 +1,45 @@
# -*- coding: UTF-8 -*-
import os
import logging
import requests
from bs4 import BeautifulSoup
from conf import settings
from bangumi_parser.analyser.simple_analyser import SimpleAnalyser
logger = logging.getLogger(__name__)
class RSSCollector:
def __init__(self):
self._simple_analyser = SimpleAnalyser()
def collect(self, bangumi_data):
try:
req = requests.get(settings.rss_link, "utf-8")
except Exception as e:
logger.exception(e)
logger.error("ERROR with DNS/Connection.")
rss = BeautifulSoup(req.text, "xml")
items = rss.find_all("item")
for item in items:
name = item.title.string
# debug 用
if settings.get_rule_debug:
logger.debug(f"Raw {name}")
episoda = self._simple_analyser.analyse(name)
if episoda:
title, group, season = episoda.title, episoda.group, episoda.season_info.raw
for d in bangumi_data["bangumi_info"]:
if d["title"] == title:
break
else:
bangumi_data["bangumi_info"].append(
{
"title": title,
"season": season,
"group": group,
"added": False,
}
)
logger.debug("add {json_title} {json_season}")

View File

@@ -7,7 +7,7 @@ from tomlkit import item
from conf import settings
from exceptions import ConflictError
from downloader.exceptions import ConflictError
logger = logging.getLogger(__name__)

View File

@@ -10,6 +10,3 @@ def setup_logger():
format=LOGGING_FORMAT,
encoding="utf-8",
)
setup_logger()

View File

@@ -1,66 +0,0 @@
import re
import requests
class Parser:
# TODO 番剧名称识别
def __init__(self, info):
self.raw_name = info
self.name = None
self.season = None
self.episode = None
self.group = None
self.dpi = None
self.language = None
try:
self.rules = requests.get(settings.rule_url).json()
except Exception as e:
logger.exception(e)
json_config.save(settings.rule_path, self.rules)
# 第一类字幕组分类
def parser_type_1(self):
name_re_group = re.sub(r"^[^(\]|】)]*(\]|】)", "", self.raw_name).strip()
match_obj = re.match(r"(.*|\[.*])( - \d{1,3}|\[\d{1,3}])(.*)", name_re_group)
name_season = match_obj.group(1).strip()
if re.search(r"S\d{1,2}", name_season) is not None:
split = re.sub(r"S\d{1,2}", "", name_season).split("/")
self.season = re.findall(r"S\d{1,2}", name_season)[0]
else:
split = name_season.split("/")
self.season = "S01"
try:
self.name = split[1].strip()
except IndexError:
self.name = split[-1].strip()
self.episode = int(re.sub(r"\-|\[|\]", "", match_obj.group(2)))
other = match_obj.group(3).strip()
language = None
def parser_type_2(self):
self.name = "name"
def parser_type_3(self):
self.name = "name"
def method(self, method):
if method == 1:
self.parser_type_1()
elif method == 2:
self.parser_type_2()
elif method == 3:
self.parser_type_3()
def split_info(self):
break_flag = False
for rule in self.rules:
for group in rule["group"]:
if re.search(group, self.raw_name):
self.method(rule["type"])
self.group = group
break_flag = True
break
if break_flag:
break

View File

@@ -1,80 +0,0 @@
import re
import qbittorrentapi
import logging
from downloader import getClient
from conf import settings
logger = logging.getLogger(__name__)
class qBittorrentRename:
def __init__(self):
self.client = getClient()
self.recent_info = self.client.torrents_info(
status_filter="completed", category="Bangumi"
)
self.count = 0
self.rename_count = 0
self.torrent_count = len(self.recent_info)
rules = [
r"(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
r"(.*)\[E(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
r"(.*)\[第(\d*\.*\d*)话(?:END)?\](.*)",
r"(.*)\[第(\d*\.*\d*)話(?:END)?\](.*)",
r"(.*)第(\d*\.*\d*)话(?:END)?(.*)",
r"(.*)第(\d*\.*\d*)話(?:END)?(.*)",
r"(.*)- (\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)? (.*)",
]
self.rules = [re.compile(rule) for rule in rules]
def rename_normal(self, name):
for rule in self.rules:
matchObj = rule.match(name, re.I)
if matchObj is not None:
new_name = f"{matchObj.group(1).strip()} E{matchObj.group(2)}{matchObj.group(3)}"
return new_name
def rename_pn(self, name):
n = re.split(r"\[|\]", name)
file_name = name.replace(f"[{n[1]}]", "")
for rule in self.rules:
matchObj = rule.match(file_name, re.I)
if matchObj is not None:
new_name = re.sub(
r"\[|\]",
"",
f"{matchObj.group(1).strip()} E{matchObj.group(2)}{n[-1]}",
)
return new_name
def rename_torrent_file(self, hash, path_name, new_name):
if path_name != new_name:
self.client.torrents_rename_file(
torrent_hash=hash, old_path=path_name, new_path=new_name
)
logger.debug(f"{path_name} >> {new_name}")
self.count += 1
def print_result(self):
logger.debug(f"已完成对{self.torrent_count}个文件的检查")
logger.debug(f"已对其中{self.count}个文件进行重命名")
logger.debug(f"完成")
def run(self):
method_dict = {"pn": self.rename_pn, "normal": self.rename_normal}
if settings.method not in method_dict:
logger.error(f"error method")
else:
for i in range(0, self.torrent_count):
try:
info = self.recent_info[i]
name = info.name
hash = info.hash
path_name = info.content_path.split("/")[-1]
new_name = method_dict[settings.method](name)
self.rename_torrent_file(hash, path_name, new_name)
except:
logger.warning(f"{name} rename fail")
self.print_result()