mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-14 02:20:53 +08:00
2.5.0-pre2
This commit is contained in:
@@ -74,7 +74,7 @@ def run():
|
||||
quit()
|
||||
download_client.rss_feed()
|
||||
rss_collector = RSSCollector()
|
||||
rename = Renamer()
|
||||
rename = Renamer(download_client)
|
||||
while True:
|
||||
bangumi_data = load_data_file()
|
||||
try:
|
||||
|
||||
@@ -103,6 +103,7 @@ class ParserLV2:
|
||||
|
||||
def process(self, raw_name):
|
||||
raw_name = self.pre_process(raw_name)
|
||||
self.get_group(raw_name)
|
||||
match_obj = re.match(
|
||||
r"(.*|\[.*])( -? \d{1,3} |\[\d{1,3}]|\[\d{1,3}.?[vV]\d{1}]|[第第]\d{1,3}[话話集集]|\[\d{1,3}.?END])(.*)",
|
||||
raw_name,
|
||||
|
||||
@@ -1,28 +1,38 @@
|
||||
from thefuzz import fuzz
|
||||
import logging
|
||||
from utils import json_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FuzzMatch:
|
||||
def __init__(self, anidb_data):
|
||||
self.match_data = anidb_data
|
||||
def __init__(self):
|
||||
self.match_data = json_config.load("/Users/Estrella/Developer/Bangumi_Auto_Collector/resource/season_data.json")
|
||||
|
||||
def match(self, title, info: dict):
|
||||
compare_value: list = []
|
||||
compare_value = []
|
||||
for type in ["main", "en", "ja", "zh-Hans", "zh-Hant"]:
|
||||
if info[type] is not None:
|
||||
a = fuzz.ratio(title.replace(" ", "").lower(), info[type].replace(" ", "").lower())
|
||||
a = fuzz.token_sort_ratio(title.lower(), info[type].lower())
|
||||
compare_value.append(a)
|
||||
for compare in info["other"]:
|
||||
a = fuzz.ratio(title.replace(" ", "").lower(), compare.replace(" ", "").lower())
|
||||
a = fuzz.token_sort_ratio(title.lower(), compare.lower())
|
||||
compare_value.append(a)
|
||||
return max(compare_value)
|
||||
|
||||
def find_max_name(self, title):
|
||||
value: list = []
|
||||
max_value = 0
|
||||
max_info = None
|
||||
for info in self.match_data:
|
||||
a = self.match(title, info)
|
||||
value.append([a, info])
|
||||
logger.debug(max(value))
|
||||
return max(value)
|
||||
if a > max_value:
|
||||
max_value = a
|
||||
max_info = info
|
||||
return max_value, max_info["main"]
|
||||
# logger.debug(max(value))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
f = FuzzMatch()
|
||||
value, title = f.find_max_name("辉夜大小姐想让我告白")
|
||||
print(value,title)
|
||||
@@ -18,7 +18,8 @@ DEFAULT_SETTINGS = {
|
||||
"dev_debug": False,
|
||||
"data_version": 3.1,
|
||||
"enable_eps_complete": False,
|
||||
"first_sleep": 600
|
||||
"first_sleep": 600,
|
||||
"webui_port": 7892
|
||||
}
|
||||
|
||||
ENV_TO_ATTR = {
|
||||
@@ -38,7 +39,8 @@ ENV_TO_ATTR = {
|
||||
),
|
||||
"AB_SEASON_ONE": ("season_one_tag", lambda e: e.lower() in ("true", "1", "t")),
|
||||
"AB_REMOVE_BAD_BT": ("remove_bad_torrent", lambda e: e.lower() in ("true", "1", "t")),
|
||||
"AB_FIRST_SLEEP": "first_sleep"
|
||||
"AB_FIRST_SLEEP": ("first_sleep", lambda e: float(e)),
|
||||
"AB_WEBUI_PORT": ("webui_port", lambda e: int(e))
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -30,10 +30,10 @@ class DownloadClient:
|
||||
prefs = self.client.get_app_prefs()
|
||||
settings.download_path = os.path.join(prefs["save_path"], "Bangumi")
|
||||
|
||||
def set_rule(self, bangumi_name, group, season, rss):
|
||||
def set_rule(self, official_name, raw_name, group, season, rss):
|
||||
rule = {
|
||||
"enable": True,
|
||||
"mustContain": bangumi_name,
|
||||
"mustContain": raw_name,
|
||||
"mustNotContain": settings.not_contain,
|
||||
"useRegex": True,
|
||||
"episodeFilter": "",
|
||||
@@ -47,12 +47,12 @@ class DownloadClient:
|
||||
"savePath": str(
|
||||
os.path.join(
|
||||
settings.download_path,
|
||||
re.sub(settings.rule_name_re, " ", bangumi_name).strip(),
|
||||
re.sub(settings.rule_name_re, " ", official_name).strip(),
|
||||
season,
|
||||
)
|
||||
),
|
||||
}
|
||||
rule_name = f"[{group}] {bangumi_name}" if settings.enable_group_tag else bangumi_name
|
||||
rule_name = f"[{group}] {official_name}" if settings.enable_group_tag else official_name
|
||||
self.client.rss_set_rule(rule_name=rule_name, rule_def=rule)
|
||||
|
||||
def rss_feed(self):
|
||||
@@ -72,11 +72,11 @@ class DownloadClient:
|
||||
self.client.rss_add_feed(url=rss_link)
|
||||
logger.info("Add RSS Feed successfully.")
|
||||
|
||||
def add_rules(self, bangumi_info):
|
||||
def add_rules(self, bangumi_info, rss_link=settings.rss_link):
|
||||
logger.info("Start adding rules.")
|
||||
for info in bangumi_info:
|
||||
if not info["added"]:
|
||||
self.set_rule(info["title"], info["group"], info["season"], settings.rss_link)
|
||||
self.set_rule(info["title"], info["title_raw"], info["group"], info["season"], rss_link)
|
||||
info["added"] = True
|
||||
logger.info("Finished.")
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
|
||||
|
||||
from conf import settings
|
||||
from bangumi_parser.analyser.rss_parser import ParserLV2
|
||||
from bangumi_parser.fuzz_match import FuzzMatch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -13,6 +14,7 @@ logger = logging.getLogger(__name__)
|
||||
class RSSCollector:
|
||||
def __init__(self):
|
||||
self._simple_analyser = ParserLV2()
|
||||
self._fuzz_match = FuzzMatch()
|
||||
|
||||
def get_rss_info(self, rss_link):
|
||||
try:
|
||||
@@ -20,9 +22,32 @@ class RSSCollector:
|
||||
rss = BeautifulSoup(req.text, "xml")
|
||||
return rss
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
# logger.exception(e)
|
||||
logger.error("ERROR with DNS/Connection.")
|
||||
|
||||
def title_parser(self, title):
|
||||
episode = self._simple_analyser.analyse(title)
|
||||
if episode:
|
||||
group, title_raw, season, ep = episode.group, episode.title, episode.season_info, episode.ep_info
|
||||
sub, dpi, source = episode.subtitle, episode.dpi, episode.source
|
||||
if ep.number > 1 and settings.enable_eps_complete:
|
||||
download_past = True
|
||||
else:
|
||||
download_past = False
|
||||
match_value, title_official = self._fuzz_match.find_max_name(title_raw)
|
||||
data = {
|
||||
"title": title_official if match_value > 55 else title_raw,
|
||||
"title_raw": title_raw,
|
||||
"season": season.raw,
|
||||
"group": group,
|
||||
"subtitle": sub,
|
||||
"source": source,
|
||||
"dpi": dpi,
|
||||
"added": False,
|
||||
"download_past": download_past
|
||||
}
|
||||
return episode, data
|
||||
|
||||
def collect(self, bangumi_data):
|
||||
rss = self.get_rss_info(settings.rss_link)
|
||||
items = rss.find_all("item")
|
||||
@@ -38,27 +63,6 @@ class RSSCollector:
|
||||
bangumi_data["bangumi_info"].append(data)
|
||||
logger.info(f"Adding {episode.title} Season {episode.season_info.number}")
|
||||
|
||||
def title_parser(self, title):
|
||||
episode = self._simple_analyser.analyse(title)
|
||||
if episode:
|
||||
group, title, season, ep = episode.group, episode.title, episode.season_info, episode.ep_info
|
||||
sub, dpi, source = episode.subtitle, episode.dpi, episode.source
|
||||
if ep.number > 1 and settings.enable_eps_complete:
|
||||
download_past = True
|
||||
else:
|
||||
download_past = False
|
||||
data = {
|
||||
"title": title,
|
||||
"season": season.raw,
|
||||
"group": group,
|
||||
"subtitle": sub,
|
||||
"source": source,
|
||||
"dpi": dpi,
|
||||
"added": False,
|
||||
"download_past": download_past
|
||||
}
|
||||
return episode, data
|
||||
|
||||
def collect_collection(self, rss_link):
|
||||
rss = self.get_rss_info(rss_link)
|
||||
item = rss.find("item")
|
||||
@@ -69,5 +73,4 @@ class RSSCollector:
|
||||
|
||||
if __name__ == "__main__":
|
||||
rss = RSSCollector()
|
||||
data = rss.collect_collection("https://mikanani.me/RSS/Classic")
|
||||
print(data)
|
||||
data = rss.get_rss_info("https://mikanasni.me/RSS/Classic")
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
#! /usr/bin/python
|
||||
import re
|
||||
import time
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import json_config
|
||||
from const import BCOLORS
|
||||
|
||||
header = {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ApplewebKit/537.36 (KHtml, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
|
||||
}
|
||||
|
||||
|
||||
def get_html(url):
|
||||
requests.adapters.DEFAULT_RETRIES = 5 # 增加重连次数
|
||||
s = requests.session()
|
||||
s.keep_alive = False # 关闭多余连接
|
||||
html = s.get(url=url, headers=header).text
|
||||
print("get html success")
|
||||
return html
|
||||
|
||||
|
||||
def get_list(year, season):
|
||||
season = ["spring", "summer", "autumn", "winter"][season - 1]
|
||||
url = "https://anidb.net/anime/season/%s/%s/" % (year, season)
|
||||
html = get_html(url)
|
||||
ids = re.findall("<a href=\"/anime/(\d+)\"><picture>", html)
|
||||
return ids
|
||||
|
||||
|
||||
def get_title(id):
|
||||
url = f"http://api.anidb.net:9001/httpapi?request=anime&client=autobangumi&clientver=1&protover=1&aid={id}"
|
||||
req = requests.get(url)
|
||||
soup = BeautifulSoup(req.text, "xml")
|
||||
titles = soup.titles.find_all("title")
|
||||
all_title_info = {
|
||||
"id": id,
|
||||
"main": None,
|
||||
"en": None,
|
||||
"zh-Hans": None,
|
||||
"zh-Hant": None,
|
||||
"ja": None,
|
||||
"other": []
|
||||
}
|
||||
for title in titles:
|
||||
if title["type"] == "main":
|
||||
all_title_info["main"] = title.string
|
||||
elif title["type"] == "official":
|
||||
if title["xml:lang"] in ["en", "zh-Hant", "zh-Hans", "ja"]:
|
||||
all_title_info[title["xml:lang"]] = title.string
|
||||
else:
|
||||
break
|
||||
elif title["type"] == "synonym":
|
||||
all_title_info["other"].append(title.string)
|
||||
else:
|
||||
break
|
||||
return all_title_info
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
json = []
|
||||
for i in [0, 1, 2]:
|
||||
ids = get_list(2022, i)
|
||||
for id in ids:
|
||||
data = get_title(id)
|
||||
print(data)
|
||||
time.sleep(2.5)
|
||||
json.append(data)
|
||||
json_config.save("season_winter.json", json)
|
||||
10000
resource/names.txt
10000
resource/names.txt
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user