2.5.0-pre3

This commit is contained in:
EstrellaXD
2022-06-12 10:29:02 +08:00
parent ecbc80a558
commit 54068c50ea
10 changed files with 2802 additions and 65 deletions

View File

@@ -3,6 +3,8 @@ import os
import time
import logging
from multiprocessing import Process
from conf import settings
from argument_parser import parse
from log import setup_logger

View File

@@ -11,21 +11,24 @@ class ParserLV2:
def __init__(self) -> None:
self._info = Episode()
def pre_process(self, raw_name):
@staticmethod
def pre_process(raw_name):
pro_name = raw_name.replace("", "[").replace("", "]")
return pro_name
def get_group(self, name):
self._info.group = re.split(r"[\[\]]", name)[1]
def second_process(self, raw_name):
@staticmethod
def second_process(raw_name):
if re.search(r"新番|月?番", raw_name):
pro_name = re.sub(".*新番.", "", raw_name)
else:
pro_name = re.sub(r"^[^]】]*[]】]", "", raw_name).strip()
return pro_name
def season_process(self, name_season):
@staticmethod
def season_process(name_season):
season_rule = r"S\d{1,2}|Season \d{1,2}|[第].[季期]"
season_map = {
"": 1,
@@ -61,7 +64,8 @@ class ParserLV2:
break
return name, season_number, season_raw
def name_process(self, name):
@staticmethod
def name_process(name):
name = name.strip()
split = re.split("/| |- ", name.replace("(仅限港澳台地区)", ""))
while "" in split:
@@ -74,7 +78,7 @@ class ParserLV2:
if len(split) == 1:
match_obj = re.match(r"([^\x00-\xff]{1,})(\s)([\x00-\xff]{4,})", name)
if match_obj is not None:
return match_obj.group(3)
return match_obj.group(3), split
compare = 0
for name in split:
l = re.findall("[aA-zZ]{1}", name).__len__()
@@ -82,9 +86,10 @@ class ParserLV2:
compare = l
for name in split:
if re.findall("[aA-zZ]{1}", name).__len__() == compare:
return name
return name.strip(), split
def find_tags(self, other):
@staticmethod
def find_tags(other):
elements = re.sub(r"[\[\]()]", " ", other).split(" ")
while "" in elements:
elements.remove("")
@@ -110,50 +115,24 @@ class ParserLV2:
)
name_season = self.second_process(match_obj.group(1))
name, season_number, season_raw = self.season_process(name_season)
name = self.name_process(name).strip()
name, name_group = self.name_process(name)
episode = int(re.findall(r"\d{1,3}", match_obj.group(2))[0])
other = match_obj.group(3).strip()
sub, dpi, source= self.find_tags(other)
return name, season_number, season_raw, episode, sub, dpi, source
return name, season_number, season_raw, episode, sub, dpi, source, name_group
def analyse(self, raw) -> Episode:
try:
self._info.title, self._info.season_info.number,\
self._info.season_info.raw, self._info.ep_info.number,\
self._info.subtitle, self._info.dpi, self._info.source \
= self.process(raw)
self._info.subtitle, self._info.dpi, self._info.source, \
self._info.title_info.group = self.process(raw)
return self._info
except:
logger.warning(f"ERROR match {raw}")
if __name__ == "__main__":
import sys, os
sys.path.append(os.path.dirname(".."))
from const import BCOLORS
from bangumi_parser.episode import Episode
parser = ParserLV2()
with (open("bangumi_parser/names.txt", "r", encoding="utf-8") as f):
err_count = 0
for name in f:
if name != "":
try:
# parser.get_group(name)
title, season, episode = parser.analyse(name)
# print(name)
# print(title)
# print(season)
# print(episode)
except:
if (
re.search(
r"\d{1,3}[-~]\d{1,3}|OVA|BD|電影|剧场版|老番|冷番|OAD|合集|劇場版|柯南|海賊王|蜡笔小新|整理|樱桃小丸子",
name,
)
is None
):
print(f"{BCOLORS._(BCOLORS.HEADER, name)}")
err_count += 1
print(BCOLORS._(BCOLORS.WARNING, err_count))
test = ParserLV2()
ep = test.analyse("【幻樱字幕组】【4月新番】【古见同学有交流障碍症 Komi-san wa, Komyushou Desu.】【22】【GB_MP4】【1920X1080】")
print(ep.title)

View File

@@ -9,6 +9,7 @@ class Episode:
self.raw: str = None
self.name: str = None
self.official: str = None
self.group: list = None
@dataclass
class SeasonInfo:

View File

@@ -1,30 +1,39 @@
from thefuzz import fuzz
import logging
from utils import json_config
from conf import settings
logger = logging.getLogger(__name__)
class FuzzMatch:
def __init__(self):
self.match_data = json_config.load("/Users/Estrella/Developer/Bangumi_Auto_Collector/resource/season_data.json")
try:
anidb_data = json_config.get(settings.anidb_url)
json_config.save(settings.anidb_path, anidb_data)
except Exception as e:
logger.debug(e)
logger.info(f"Fail to get anidb data, reading local data")
anidb_data = json_config.load(settings.anidb_data)
self.match_data = anidb_data
def match(self, title, info: dict):
@staticmethod
def match(title_raw, info: dict):
compare_value = []
for type in ["main", "en", "ja", "zh-Hans", "zh-Hant"]:
if info[type] is not None:
a = fuzz.token_sort_ratio(title.lower(), info[type].lower())
for tag in ["main", "en", "ja", "zh-Hans", "zh-Hant"]:
if info[tag] is not None:
a = fuzz.token_sort_ratio(title_raw.lower(), info[tag].lower())
compare_value.append(a)
for compare in info["other"]:
a = fuzz.token_sort_ratio(title.lower(), compare.lower())
a = fuzz.token_sort_ratio(title_raw.lower(), compare.lower())
compare_value.append(a)
return max(compare_value)
def find_max_name(self, title):
def find_max_name(self, title_raw):
max_value = 0
max_info = None
for info in self.match_data:
a = self.match(title, info)
a = self.match(title_raw, info)
if a > max_value:
max_value = a
max_info = info
@@ -33,6 +42,11 @@ class FuzzMatch:
if __name__ == "__main__":
from const_dev import DEV_SETTINGS
settings.init(DEV_SETTINGS)
f = FuzzMatch()
value, title = f.find_max_name("辉夜大小姐想让我告白")
print(value,title)
name = "勇者、辞职不干了"
value, title = f.find_max_name(name)
print(f"Raw Name: {name} \n"
f"Match Name: {title} \n"
f"Match Value: {value}")

View File

@@ -9,6 +9,8 @@ DEFAULT_SETTINGS = {
"method": "pn",
"enable_group_tag": False,
"info_path": "/config/bangumi.json",
"anidb_path": "/config/anidb.json",
"anidb_url": "https://raw.githubusercontent.com/EstrellaXD/Auto_Bangumi_resourse/main/resource/season_data.json",
"not_contain": "720",
"rule_name_re": r"\:|\/|\.",
"connect_retry_interval": 5,
@@ -16,10 +18,11 @@ DEFAULT_SETTINGS = {
"season_one_tag": True,
"remove_bad_torrent": False,
"dev_debug": False,
"data_version": 3.1,
"data_version": 4.0,
"enable_eps_complete": False,
"first_sleep": 600,
"webui_port": 7892
"webui_port": 7892,
"enable_fuzz_match": True
}
ENV_TO_ATTR = {
@@ -40,7 +43,8 @@ ENV_TO_ATTR = {
"AB_SEASON_ONE": ("season_one_tag", lambda e: e.lower() in ("true", "1", "t")),
"AB_REMOVE_BAD_BT": ("remove_bad_torrent", lambda e: e.lower() in ("true", "1", "t")),
"AB_FIRST_SLEEP": ("first_sleep", lambda e: float(e)),
"AB_WEBUI_PORT": ("webui_port", lambda e: int(e))
"AB_WEBUI_PORT": ("webui_port", lambda e: int(e)),
"AB_FUZZ_MATCH": ("enable_fuzz_match", lambda e: e.lower() in ("true", "1", "t"))
}

View File

@@ -25,7 +25,7 @@ class RSSCollector:
# logger.exception(e)
logger.error("ERROR with DNS/Connection.")
def title_parser(self, title):
def title_parser(self, title, fuzz_match=True):
episode = self._simple_analyser.analyse(title)
if episode:
group, title_raw, season, ep = episode.group, episode.title, episode.season_info, episode.ep_info
@@ -34,9 +34,13 @@ class RSSCollector:
download_past = True
else:
download_past = False
match_value, title_official = self._fuzz_match.find_max_name(title_raw)
if fuzz_match:
match_value, title_official = self._fuzz_match.find_max_name(title_raw)
else:
match_value, title_official = 0, None
title_official = title_official if match_value > 55 else title_raw
data = {
"title": title_official if match_value > 55 else title_raw,
"title": title_official,
"title_raw": title_raw,
"season": season.raw,
"group": group,
@@ -46,31 +50,39 @@ class RSSCollector:
"added": False,
"download_past": download_past
}
return episode, data
return episode, data, title_official
def collect(self, bangumi_data):
rss = self.get_rss_info(settings.rss_link)
items = rss.find_all("item")
for item in items:
add = True
name = item.title.string
# debug 用
if settings.debug_mode:
logger.debug(f"Raw {name}")
episode, data = self.title_parser(name)
episode, data, title_official = self.title_parser(name)
for d in bangumi_data["bangumi_info"]:
if d["title"] == episode.title:
if d["title"] == title_official:
add = False
break
if add:
if settings.debug_mode:
logger.debug(f"Raw {name}")
bangumi_data["bangumi_info"].append(data)
logger.info(f"Adding {episode.title} Season {episode.season_info.number}")
logger.info(f"Adding {title_official} Season {episode.season_info.number}")
def collect_collection(self, rss_link):
rss = self.get_rss_info(rss_link)
item = rss.find("item")
title = item.title.string
_, data = self.title_parser(title)
_, data, _ = self.title_parser(title, fuzz_match=True)
return data
if __name__ == "__main__":
from const_dev import DEV_SETTINGS
from utils import json_config
settings.init(DEV_SETTINGS)
rss = RSSCollector()
data = rss.get_rss_info("https://mikanasni.me/RSS/Classic")
info = json_config.load("/Users/Estrella/Developer/Bangumi_Auto_Collector/config/bangumi.json")
rss.collect(info)
print(info)

4
auto_bangumi/run.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
exec python3 app.py -d&
exec python3 web.py

View File

@@ -1,4 +1,5 @@
import json
import requests
def load(filename):
@@ -10,3 +11,8 @@ def save(filename, obj):
with open(filename, "w", encoding="utf8") as f:
json.dump(obj, f, indent=4, separators=(",", ": "), ensure_ascii=False)
pass
def get(url):
req = requests.get(url)
return req.json()

78
auto_bangumi/web.py Normal file
View File

@@ -0,0 +1,78 @@
from typing import Union
import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
import logging
from core.rss_collector import RSSCollector
from core.download_client import DownloadClient
from conf import settings
from utils import json_config
logger = logging.getLogger(__name__)
app = FastAPI()
templates = Jinja2Templates(directory="templates")
@app.get("/", response_class=HTMLResponse)
def index(request: Request):
context = {"request": request}
return templates.TemplateResponse("index.html", context)
class Config(BaseModel):
rss_link: str
host: str
user_name: str
password: str
download_path: str
method: str
enable_group_tag: bool
not_contain: str
debug_mode: bool
season_one_tag: bool
remove_bad_torrent: bool
enable_eps_complete: bool
@app.post("/api/v1/config")
async def config(config: Config):
data = {
"rss_link": config.rss_link,
"host": config.host,
"user_name": config.user_name,
"password": config.password,
"download_path": config.download_path,
"method": config.method,
"enable_group_tag": config.enable_group_tag,
"not_contain": config.not_contain,
"debug_mode": config.debug_mode,
"season_one": config.season_one_tag,
"remove_bad_torrent": config.remove_bad_torrent,
"enable_eps_complete": config.enable_eps_complete
}
json_config.save("/config/config.json", data)
return "received"
class RSS(BaseModel):
link: str
@app.post("/api/v1/subscriptions")
async def receive(link: RSS):
data = RSSCollector().collect_collection(link.link)
client = DownloadClient()
client.add_collection_feed(link.link)
client.add_rules(data, rss_link=link.link)
return data
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=settings.webui_port)

2637
config/anidb.json Normal file

File diff suppressed because it is too large Load Diff