2.5.0-pre2

This commit is contained in:
EstrellaXD
2022-06-11 13:50:26 +08:00
parent 65748d4808
commit ecbc80a558
8 changed files with 58 additions and 10112 deletions

View File

@@ -74,7 +74,7 @@ def run():
quit()
download_client.rss_feed()
rss_collector = RSSCollector()
rename = Renamer()
rename = Renamer(download_client)
while True:
bangumi_data = load_data_file()
try:

View File

@@ -103,6 +103,7 @@ class ParserLV2:
def process(self, raw_name):
raw_name = self.pre_process(raw_name)
self.get_group(raw_name)
match_obj = re.match(
r"(.*|\[.*])( -? \d{1,3} |\[\d{1,3}]|\[\d{1,3}.?[vV]\d{1}]|[第第]\d{1,3}[话話集集]|\[\d{1,3}.?END])(.*)",
raw_name,

View File

@@ -1,28 +1,38 @@
from thefuzz import fuzz
import logging
from utils import json_config
logger = logging.getLogger(__name__)
class FuzzMatch:
def __init__(self, anidb_data):
self.match_data = anidb_data
def __init__(self):
self.match_data = json_config.load("/Users/Estrella/Developer/Bangumi_Auto_Collector/resource/season_data.json")
def match(self, title, info: dict):
compare_value: list = []
compare_value = []
for type in ["main", "en", "ja", "zh-Hans", "zh-Hant"]:
if info[type] is not None:
a = fuzz.ratio(title.replace(" ", "").lower(), info[type].replace(" ", "").lower())
a = fuzz.token_sort_ratio(title.lower(), info[type].lower())
compare_value.append(a)
for compare in info["other"]:
a = fuzz.ratio(title.replace(" ", "").lower(), compare.replace(" ", "").lower())
a = fuzz.token_sort_ratio(title.lower(), compare.lower())
compare_value.append(a)
return max(compare_value)
def find_max_name(self, title):
value: list = []
max_value = 0
max_info = None
for info in self.match_data:
a = self.match(title, info)
value.append([a, info])
logger.debug(max(value))
return max(value)
if a > max_value:
max_value = a
max_info = info
return max_value, max_info["main"]
# logger.debug(max(value))
if __name__ == "__main__":
f = FuzzMatch()
value, title = f.find_max_name("辉夜大小姐想让我告白")
print(value,title)

View File

@@ -18,7 +18,8 @@ DEFAULT_SETTINGS = {
"dev_debug": False,
"data_version": 3.1,
"enable_eps_complete": False,
"first_sleep": 600
"first_sleep": 600,
"webui_port": 7892
}
ENV_TO_ATTR = {
@@ -38,7 +39,8 @@ ENV_TO_ATTR = {
),
"AB_SEASON_ONE": ("season_one_tag", lambda e: e.lower() in ("true", "1", "t")),
"AB_REMOVE_BAD_BT": ("remove_bad_torrent", lambda e: e.lower() in ("true", "1", "t")),
"AB_FIRST_SLEEP": "first_sleep"
"AB_FIRST_SLEEP": ("first_sleep", lambda e: float(e)),
"AB_WEBUI_PORT": ("webui_port", lambda e: int(e))
}

View File

@@ -30,10 +30,10 @@ class DownloadClient:
prefs = self.client.get_app_prefs()
settings.download_path = os.path.join(prefs["save_path"], "Bangumi")
def set_rule(self, bangumi_name, group, season, rss):
def set_rule(self, official_name, raw_name, group, season, rss):
rule = {
"enable": True,
"mustContain": bangumi_name,
"mustContain": raw_name,
"mustNotContain": settings.not_contain,
"useRegex": True,
"episodeFilter": "",
@@ -47,12 +47,12 @@ class DownloadClient:
"savePath": str(
os.path.join(
settings.download_path,
re.sub(settings.rule_name_re, " ", bangumi_name).strip(),
re.sub(settings.rule_name_re, " ", official_name).strip(),
season,
)
),
}
rule_name = f"[{group}] {bangumi_name}" if settings.enable_group_tag else bangumi_name
rule_name = f"[{group}] {official_name}" if settings.enable_group_tag else official_name
self.client.rss_set_rule(rule_name=rule_name, rule_def=rule)
def rss_feed(self):
@@ -72,11 +72,11 @@ class DownloadClient:
self.client.rss_add_feed(url=rss_link)
logger.info("Add RSS Feed successfully.")
def add_rules(self, bangumi_info):
def add_rules(self, bangumi_info, rss_link=settings.rss_link):
logger.info("Start adding rules.")
for info in bangumi_info:
if not info["added"]:
self.set_rule(info["title"], info["group"], info["season"], settings.rss_link)
self.set_rule(info["title"], info["title_raw"], info["group"], info["season"], rss_link)
info["added"] = True
logger.info("Finished.")

View File

@@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
from conf import settings
from bangumi_parser.analyser.rss_parser import ParserLV2
from bangumi_parser.fuzz_match import FuzzMatch
logger = logging.getLogger(__name__)
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
class RSSCollector:
def __init__(self):
self._simple_analyser = ParserLV2()
self._fuzz_match = FuzzMatch()
def get_rss_info(self, rss_link):
try:
@@ -20,9 +22,32 @@ class RSSCollector:
rss = BeautifulSoup(req.text, "xml")
return rss
except Exception as e:
logger.exception(e)
# logger.exception(e)
logger.error("ERROR with DNS/Connection.")
def title_parser(self, title):
episode = self._simple_analyser.analyse(title)
if episode:
group, title_raw, season, ep = episode.group, episode.title, episode.season_info, episode.ep_info
sub, dpi, source = episode.subtitle, episode.dpi, episode.source
if ep.number > 1 and settings.enable_eps_complete:
download_past = True
else:
download_past = False
match_value, title_official = self._fuzz_match.find_max_name(title_raw)
data = {
"title": title_official if match_value > 55 else title_raw,
"title_raw": title_raw,
"season": season.raw,
"group": group,
"subtitle": sub,
"source": source,
"dpi": dpi,
"added": False,
"download_past": download_past
}
return episode, data
def collect(self, bangumi_data):
rss = self.get_rss_info(settings.rss_link)
items = rss.find_all("item")
@@ -38,27 +63,6 @@ class RSSCollector:
bangumi_data["bangumi_info"].append(data)
logger.info(f"Adding {episode.title} Season {episode.season_info.number}")
def title_parser(self, title):
episode = self._simple_analyser.analyse(title)
if episode:
group, title, season, ep = episode.group, episode.title, episode.season_info, episode.ep_info
sub, dpi, source = episode.subtitle, episode.dpi, episode.source
if ep.number > 1 and settings.enable_eps_complete:
download_past = True
else:
download_past = False
data = {
"title": title,
"season": season.raw,
"group": group,
"subtitle": sub,
"source": source,
"dpi": dpi,
"added": False,
"download_past": download_past
}
return episode, data
def collect_collection(self, rss_link):
rss = self.get_rss_info(rss_link)
item = rss.find("item")
@@ -69,5 +73,4 @@ class RSSCollector:
if __name__ == "__main__":
rss = RSSCollector()
data = rss.collect_collection("https://mikanani.me/RSS/Classic")
print(data)
data = rss.get_rss_info("https://mikanasni.me/RSS/Classic")

View File

@@ -1,70 +0,0 @@
#! /usr/bin/python
import re
import time
import requests
from bs4 import BeautifulSoup
from utils import json_config
from const import BCOLORS
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ApplewebKit/537.36 (KHtml, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
}
def get_html(url):
requests.adapters.DEFAULT_RETRIES = 5 # 增加重连次数
s = requests.session()
s.keep_alive = False # 关闭多余连接
html = s.get(url=url, headers=header).text
print("get html success")
return html
def get_list(year, season):
season = ["spring", "summer", "autumn", "winter"][season - 1]
url = "https://anidb.net/anime/season/%s/%s/" % (year, season)
html = get_html(url)
ids = re.findall("<a href=\"/anime/(\d+)\"><picture>", html)
return ids
def get_title(id):
url = f"http://api.anidb.net:9001/httpapi?request=anime&client=autobangumi&clientver=1&protover=1&aid={id}"
req = requests.get(url)
soup = BeautifulSoup(req.text, "xml")
titles = soup.titles.find_all("title")
all_title_info = {
"id": id,
"main": None,
"en": None,
"zh-Hans": None,
"zh-Hant": None,
"ja": None,
"other": []
}
for title in titles:
if title["type"] == "main":
all_title_info["main"] = title.string
elif title["type"] == "official":
if title["xml:lang"] in ["en", "zh-Hant", "zh-Hans", "ja"]:
all_title_info[title["xml:lang"]] = title.string
else:
break
elif title["type"] == "synonym":
all_title_info["other"].append(title.string)
else:
break
return all_title_info
if __name__ == "__main__":
json = []
for i in [0, 1, 2]:
ids = get_list(2022, i)
for id in ids:
data = get_title(id)
print(data)
time.sleep(2.5)
json.append(data)
json_config.save("season_winter.json", json)

File diff suppressed because it is too large Load Diff