mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-13 18:11:03 +08:00
2.5.0-pre3
This commit is contained in:
@@ -3,6 +3,8 @@ import os
|
||||
import time
|
||||
import logging
|
||||
|
||||
from multiprocessing import Process
|
||||
|
||||
from conf import settings
|
||||
from argument_parser import parse
|
||||
from log import setup_logger
|
||||
|
||||
@@ -11,21 +11,24 @@ class ParserLV2:
|
||||
def __init__(self) -> None:
|
||||
self._info = Episode()
|
||||
|
||||
def pre_process(self, raw_name):
|
||||
@staticmethod
|
||||
def pre_process(raw_name):
|
||||
pro_name = raw_name.replace("【", "[").replace("】", "]")
|
||||
return pro_name
|
||||
|
||||
def get_group(self, name):
|
||||
self._info.group = re.split(r"[\[\]]", name)[1]
|
||||
|
||||
def second_process(self, raw_name):
|
||||
@staticmethod
|
||||
def second_process(raw_name):
|
||||
if re.search(r"新番|月?番", raw_name):
|
||||
pro_name = re.sub(".*新番.", "", raw_name)
|
||||
else:
|
||||
pro_name = re.sub(r"^[^]】]*[]】]", "", raw_name).strip()
|
||||
return pro_name
|
||||
|
||||
def season_process(self, name_season):
|
||||
@staticmethod
|
||||
def season_process(name_season):
|
||||
season_rule = r"S\d{1,2}|Season \d{1,2}|[第].[季期]"
|
||||
season_map = {
|
||||
"一": 1,
|
||||
@@ -61,7 +64,8 @@ class ParserLV2:
|
||||
break
|
||||
return name, season_number, season_raw
|
||||
|
||||
def name_process(self, name):
|
||||
@staticmethod
|
||||
def name_process(name):
|
||||
name = name.strip()
|
||||
split = re.split("/| |- ", name.replace("(仅限港澳台地区)", ""))
|
||||
while "" in split:
|
||||
@@ -74,7 +78,7 @@ class ParserLV2:
|
||||
if len(split) == 1:
|
||||
match_obj = re.match(r"([^\x00-\xff]{1,})(\s)([\x00-\xff]{4,})", name)
|
||||
if match_obj is not None:
|
||||
return match_obj.group(3)
|
||||
return match_obj.group(3), split
|
||||
compare = 0
|
||||
for name in split:
|
||||
l = re.findall("[aA-zZ]{1}", name).__len__()
|
||||
@@ -82,9 +86,10 @@ class ParserLV2:
|
||||
compare = l
|
||||
for name in split:
|
||||
if re.findall("[aA-zZ]{1}", name).__len__() == compare:
|
||||
return name
|
||||
return name.strip(), split
|
||||
|
||||
def find_tags(self, other):
|
||||
@staticmethod
|
||||
def find_tags(other):
|
||||
elements = re.sub(r"[\[\]()()]", " ", other).split(" ")
|
||||
while "" in elements:
|
||||
elements.remove("")
|
||||
@@ -110,50 +115,24 @@ class ParserLV2:
|
||||
)
|
||||
name_season = self.second_process(match_obj.group(1))
|
||||
name, season_number, season_raw = self.season_process(name_season)
|
||||
name = self.name_process(name).strip()
|
||||
name, name_group = self.name_process(name)
|
||||
episode = int(re.findall(r"\d{1,3}", match_obj.group(2))[0])
|
||||
other = match_obj.group(3).strip()
|
||||
sub, dpi, source= self.find_tags(other)
|
||||
return name, season_number, season_raw, episode, sub, dpi, source
|
||||
return name, season_number, season_raw, episode, sub, dpi, source, name_group
|
||||
|
||||
def analyse(self, raw) -> Episode:
|
||||
try:
|
||||
self._info.title, self._info.season_info.number,\
|
||||
self._info.season_info.raw, self._info.ep_info.number,\
|
||||
self._info.subtitle, self._info.dpi, self._info.source \
|
||||
= self.process(raw)
|
||||
self._info.subtitle, self._info.dpi, self._info.source, \
|
||||
self._info.title_info.group = self.process(raw)
|
||||
return self._info
|
||||
except:
|
||||
logger.warning(f"ERROR match {raw}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys, os
|
||||
|
||||
sys.path.append(os.path.dirname(".."))
|
||||
from const import BCOLORS
|
||||
from bangumi_parser.episode import Episode
|
||||
|
||||
parser = ParserLV2()
|
||||
with (open("bangumi_parser/names.txt", "r", encoding="utf-8") as f):
|
||||
err_count = 0
|
||||
for name in f:
|
||||
if name != "":
|
||||
try:
|
||||
# parser.get_group(name)
|
||||
title, season, episode = parser.analyse(name)
|
||||
# print(name)
|
||||
# print(title)
|
||||
# print(season)
|
||||
# print(episode)
|
||||
except:
|
||||
if (
|
||||
re.search(
|
||||
r"\d{1,3}[-~]\d{1,3}|OVA|BD|電影|剧场版|老番|冷番|OAD|合集|劇場版|柯南|海賊王|蜡笔小新|整理|樱桃小丸子",
|
||||
name,
|
||||
)
|
||||
is None
|
||||
):
|
||||
print(f"{BCOLORS._(BCOLORS.HEADER, name)}")
|
||||
err_count += 1
|
||||
print(BCOLORS._(BCOLORS.WARNING, err_count))
|
||||
test = ParserLV2()
|
||||
ep = test.analyse("【幻樱字幕组】【4月新番】【古见同学有交流障碍症 Komi-san wa, Komyushou Desu.】【22】【GB_MP4】【1920X1080】")
|
||||
print(ep.title)
|
||||
|
||||
@@ -9,6 +9,7 @@ class Episode:
|
||||
self.raw: str = None
|
||||
self.name: str = None
|
||||
self.official: str = None
|
||||
self.group: list = None
|
||||
|
||||
@dataclass
|
||||
class SeasonInfo:
|
||||
|
||||
@@ -1,30 +1,39 @@
|
||||
from thefuzz import fuzz
|
||||
import logging
|
||||
from utils import json_config
|
||||
from conf import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FuzzMatch:
|
||||
def __init__(self):
|
||||
self.match_data = json_config.load("/Users/Estrella/Developer/Bangumi_Auto_Collector/resource/season_data.json")
|
||||
try:
|
||||
anidb_data = json_config.get(settings.anidb_url)
|
||||
json_config.save(settings.anidb_path, anidb_data)
|
||||
except Exception as e:
|
||||
logger.debug(e)
|
||||
logger.info(f"Fail to get anidb data, reading local data")
|
||||
anidb_data = json_config.load(settings.anidb_data)
|
||||
self.match_data = anidb_data
|
||||
|
||||
def match(self, title, info: dict):
|
||||
@staticmethod
|
||||
def match(title_raw, info: dict):
|
||||
compare_value = []
|
||||
for type in ["main", "en", "ja", "zh-Hans", "zh-Hant"]:
|
||||
if info[type] is not None:
|
||||
a = fuzz.token_sort_ratio(title.lower(), info[type].lower())
|
||||
for tag in ["main", "en", "ja", "zh-Hans", "zh-Hant"]:
|
||||
if info[tag] is not None:
|
||||
a = fuzz.token_sort_ratio(title_raw.lower(), info[tag].lower())
|
||||
compare_value.append(a)
|
||||
for compare in info["other"]:
|
||||
a = fuzz.token_sort_ratio(title.lower(), compare.lower())
|
||||
a = fuzz.token_sort_ratio(title_raw.lower(), compare.lower())
|
||||
compare_value.append(a)
|
||||
return max(compare_value)
|
||||
|
||||
def find_max_name(self, title):
|
||||
def find_max_name(self, title_raw):
|
||||
max_value = 0
|
||||
max_info = None
|
||||
for info in self.match_data:
|
||||
a = self.match(title, info)
|
||||
a = self.match(title_raw, info)
|
||||
if a > max_value:
|
||||
max_value = a
|
||||
max_info = info
|
||||
@@ -33,6 +42,11 @@ class FuzzMatch:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from const_dev import DEV_SETTINGS
|
||||
settings.init(DEV_SETTINGS)
|
||||
f = FuzzMatch()
|
||||
value, title = f.find_max_name("辉夜大小姐想让我告白")
|
||||
print(value,title)
|
||||
name = "勇者、辞职不干了"
|
||||
value, title = f.find_max_name(name)
|
||||
print(f"Raw Name: {name} \n"
|
||||
f"Match Name: {title} \n"
|
||||
f"Match Value: {value}")
|
||||
|
||||
@@ -9,6 +9,8 @@ DEFAULT_SETTINGS = {
|
||||
"method": "pn",
|
||||
"enable_group_tag": False,
|
||||
"info_path": "/config/bangumi.json",
|
||||
"anidb_path": "/config/anidb.json",
|
||||
"anidb_url": "https://raw.githubusercontent.com/EstrellaXD/Auto_Bangumi_resourse/main/resource/season_data.json",
|
||||
"not_contain": "720",
|
||||
"rule_name_re": r"\:|\/|\.",
|
||||
"connect_retry_interval": 5,
|
||||
@@ -16,10 +18,11 @@ DEFAULT_SETTINGS = {
|
||||
"season_one_tag": True,
|
||||
"remove_bad_torrent": False,
|
||||
"dev_debug": False,
|
||||
"data_version": 3.1,
|
||||
"data_version": 4.0,
|
||||
"enable_eps_complete": False,
|
||||
"first_sleep": 600,
|
||||
"webui_port": 7892
|
||||
"webui_port": 7892,
|
||||
"enable_fuzz_match": True
|
||||
}
|
||||
|
||||
ENV_TO_ATTR = {
|
||||
@@ -40,7 +43,8 @@ ENV_TO_ATTR = {
|
||||
"AB_SEASON_ONE": ("season_one_tag", lambda e: e.lower() in ("true", "1", "t")),
|
||||
"AB_REMOVE_BAD_BT": ("remove_bad_torrent", lambda e: e.lower() in ("true", "1", "t")),
|
||||
"AB_FIRST_SLEEP": ("first_sleep", lambda e: float(e)),
|
||||
"AB_WEBUI_PORT": ("webui_port", lambda e: int(e))
|
||||
"AB_WEBUI_PORT": ("webui_port", lambda e: int(e)),
|
||||
"AB_FUZZ_MATCH": ("enable_fuzz_match", lambda e: e.lower() in ("true", "1", "t"))
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ class RSSCollector:
|
||||
# logger.exception(e)
|
||||
logger.error("ERROR with DNS/Connection.")
|
||||
|
||||
def title_parser(self, title):
|
||||
def title_parser(self, title, fuzz_match=True):
|
||||
episode = self._simple_analyser.analyse(title)
|
||||
if episode:
|
||||
group, title_raw, season, ep = episode.group, episode.title, episode.season_info, episode.ep_info
|
||||
@@ -34,9 +34,13 @@ class RSSCollector:
|
||||
download_past = True
|
||||
else:
|
||||
download_past = False
|
||||
match_value, title_official = self._fuzz_match.find_max_name(title_raw)
|
||||
if fuzz_match:
|
||||
match_value, title_official = self._fuzz_match.find_max_name(title_raw)
|
||||
else:
|
||||
match_value, title_official = 0, None
|
||||
title_official = title_official if match_value > 55 else title_raw
|
||||
data = {
|
||||
"title": title_official if match_value > 55 else title_raw,
|
||||
"title": title_official,
|
||||
"title_raw": title_raw,
|
||||
"season": season.raw,
|
||||
"group": group,
|
||||
@@ -46,31 +50,39 @@ class RSSCollector:
|
||||
"added": False,
|
||||
"download_past": download_past
|
||||
}
|
||||
return episode, data
|
||||
return episode, data, title_official
|
||||
|
||||
def collect(self, bangumi_data):
|
||||
rss = self.get_rss_info(settings.rss_link)
|
||||
items = rss.find_all("item")
|
||||
for item in items:
|
||||
add = True
|
||||
name = item.title.string
|
||||
# debug 用
|
||||
if settings.debug_mode:
|
||||
logger.debug(f"Raw {name}")
|
||||
episode, data = self.title_parser(name)
|
||||
episode, data, title_official = self.title_parser(name)
|
||||
for d in bangumi_data["bangumi_info"]:
|
||||
if d["title"] == episode.title:
|
||||
if d["title"] == title_official:
|
||||
add = False
|
||||
break
|
||||
if add:
|
||||
if settings.debug_mode:
|
||||
logger.debug(f"Raw {name}")
|
||||
bangumi_data["bangumi_info"].append(data)
|
||||
logger.info(f"Adding {episode.title} Season {episode.season_info.number}")
|
||||
logger.info(f"Adding {title_official} Season {episode.season_info.number}")
|
||||
|
||||
def collect_collection(self, rss_link):
|
||||
rss = self.get_rss_info(rss_link)
|
||||
item = rss.find("item")
|
||||
title = item.title.string
|
||||
_, data = self.title_parser(title)
|
||||
_, data, _ = self.title_parser(title, fuzz_match=True)
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from const_dev import DEV_SETTINGS
|
||||
from utils import json_config
|
||||
settings.init(DEV_SETTINGS)
|
||||
rss = RSSCollector()
|
||||
data = rss.get_rss_info("https://mikanasni.me/RSS/Classic")
|
||||
info = json_config.load("/Users/Estrella/Developer/Bangumi_Auto_Collector/config/bangumi.json")
|
||||
rss.collect(info)
|
||||
print(info)
|
||||
|
||||
|
||||
4
auto_bangumi/run.sh
Executable file
4
auto_bangumi/run.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
exec python3 app.py -d&
|
||||
exec python3 web.py
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
def load(filename):
|
||||
@@ -10,3 +11,8 @@ def save(filename, obj):
|
||||
with open(filename, "w", encoding="utf8") as f:
|
||||
json.dump(obj, f, indent=4, separators=(",", ": "), ensure_ascii=False)
|
||||
pass
|
||||
|
||||
|
||||
def get(url):
|
||||
req = requests.get(url)
|
||||
return req.json()
|
||||
78
auto_bangumi/web.py
Normal file
78
auto_bangumi/web.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from typing import Union
|
||||
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from pydantic import BaseModel
|
||||
import logging
|
||||
|
||||
from core.rss_collector import RSSCollector
|
||||
from core.download_client import DownloadClient
|
||||
from conf import settings
|
||||
from utils import json_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
templates = Jinja2Templates(directory="templates")
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
def index(request: Request):
|
||||
context = {"request": request}
|
||||
return templates.TemplateResponse("index.html", context)
|
||||
|
||||
|
||||
class Config(BaseModel):
|
||||
rss_link: str
|
||||
host: str
|
||||
user_name: str
|
||||
password: str
|
||||
download_path: str
|
||||
method: str
|
||||
enable_group_tag: bool
|
||||
not_contain: str
|
||||
debug_mode: bool
|
||||
season_one_tag: bool
|
||||
remove_bad_torrent: bool
|
||||
enable_eps_complete: bool
|
||||
|
||||
|
||||
@app.post("/api/v1/config")
|
||||
async def config(config: Config):
|
||||
data = {
|
||||
"rss_link": config.rss_link,
|
||||
"host": config.host,
|
||||
"user_name": config.user_name,
|
||||
"password": config.password,
|
||||
"download_path": config.download_path,
|
||||
"method": config.method,
|
||||
"enable_group_tag": config.enable_group_tag,
|
||||
"not_contain": config.not_contain,
|
||||
"debug_mode": config.debug_mode,
|
||||
"season_one": config.season_one_tag,
|
||||
"remove_bad_torrent": config.remove_bad_torrent,
|
||||
"enable_eps_complete": config.enable_eps_complete
|
||||
}
|
||||
json_config.save("/config/config.json", data)
|
||||
return "received"
|
||||
|
||||
|
||||
class RSS(BaseModel):
|
||||
link: str
|
||||
|
||||
|
||||
@app.post("/api/v1/subscriptions")
|
||||
async def receive(link: RSS):
|
||||
data = RSSCollector().collect_collection(link.link)
|
||||
client = DownloadClient()
|
||||
client.add_collection_feed(link.link)
|
||||
client.add_rules(data, rss_link=link.link)
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=settings.webui_port)
|
||||
2637
config/anidb.json
Normal file
2637
config/anidb.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user