From 2e0c74db805739e0f028a4798e1d5ace25f9d021 Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Fri, 3 Jun 2022 17:57:42 +0800 Subject: [PATCH] ParserLV2 update --- .../app/bangumi_parser/analyser/rss_parser.py | 113 ++++++++++++++++++ .../analyser/simple_analyser.py | 2 +- AutoBangumi/app/bangumi_parser/episode.py | 4 +- AutoBangumi/app/const.py | 2 +- AutoBangumi/app/const_dev.py | 5 +- AutoBangumi/app/core/rss_collector.py | 2 +- README.md | 2 +- 7 files changed, 123 insertions(+), 7 deletions(-) create mode 100644 AutoBangumi/app/bangumi_parser/analyser/rss_parser.py diff --git a/AutoBangumi/app/bangumi_parser/analyser/rss_parser.py b/AutoBangumi/app/bangumi_parser/analyser/rss_parser.py new file mode 100644 index 00000000..74dbb9f9 --- /dev/null +++ b/AutoBangumi/app/bangumi_parser/analyser/rss_parser.py @@ -0,0 +1,113 @@ +import logging +import re +from bangumi_parser.episode import Episode + +logger = logging.getLogger(__name__) + + +class ParserLV2: + def __init__(self) -> None: + self.name = None + + def pre_process(self, raw_name): + if re.search(r"新番|月?番", raw_name): + pro_name = re.sub(".*新番.", "", raw_name) + else: + pro_name = re.sub(r"^[^]】]*[]】]", "", raw_name).strip() + return pro_name + + def season_process(self, name_season): + season_rule = r"S\d{1,2}|Season \d{1,2}|[第].[季期]" + season_map = { + "一": 1, + "二": 2, + "三": 3, + "四": 4, + "五": 5, + "六": 6, + "十": 10, + } + name_season = re.sub(r"[\[\]]", " ", name_season) + seasons = re.findall(season_rule, name_season) + if not seasons: + name = name_season + season = 1 + else: + name = re.sub(season_rule, "", name_season) + for season in seasons: + if re.search(r"S|Season", season) is not None: + season = int(re.sub(r"S|Season", "", season)) + break + elif re.search(r"[第 ].*[季期]", season) is not None: + season_pro = re.sub(r"[第季期 ]", "", season) + try: + season = int(season_pro) + except ValueError: + season = season_map[season_pro] + break + return name, season + + def name_process(self, name): + split = re.split("/| |- ", name.replace("(仅限港澳台地区)", "")) + while "" in split: + split.remove("") + if len(split) == 1: + if re.search("_{1}", split[0]) is not None: + split = re.split("_", split[0]) + if len(split) == 1: + if re.search(" - {1}", split[0]) is not None: + split = re.split("-", split[0]) + if len(split) == 1: + match_obj = re.match(r"([^\x00-\xff]{1,}) ([\x00-\xff]{4,})", split[0]) + if match_obj is not None: + return match_obj.group(2) + for name in split: + if re.search("[\x00-\xff]{4}", name.strip()) is not None: + return name + return split[0] + + def process(self, raw_name): + raw_name = raw_name.replace("【", "[").replace("】", "]") + match_obj = re.match(r"(.*|\[.*])( -? \d{1,3} |\[\d{1,3}]|\[\d{1,3}.?[vV]\d{1}]|[第第]\d{1,3}[话話集集]|\[\d{1,3}.?END])(.*)", raw_name) + name_season = self.pre_process(match_obj.group(1)) + name, season = self.season_process(name_season) + name = self.name_process(name).strip() + episode = int(re.findall(r"\d{1,3}", match_obj.group(2))[0]) + other = match_obj.group(3).strip() + language = None + return name, season, episode + + def run(self, raw) -> Episode: + try: + name, season, episode = self.process(raw) + info = Episode() + info.title = name + info.season_info.number = season + info.EpisodeInfo.number = episode + except: + logger.warning(f"ERROR match {raw}") + + +if __name__ == "__main__": + import sys, os + + sys.path.append(os.path.dirname("..")) + from const import BCOLORS + + parser = ParserLV2() + with (open("bangumi_parser/names.txt", "r", encoding="utf-8") as f): + err_count = 0 + for name in f: + if name != "": + try: + print(name) + title, season, episode = parser.process(name) + print(title) + print(season) + print(episode) + except: + if re.search(r"\d{1,3}[-~]\d{1,3}|OVA|BD|電影|剧场版|老番|冷番|OAD|合集|劇場版|柯南|海賊王|蜡笔小新|整理|樱桃小丸子", name) is None: + print(f"{BCOLORS._(BCOLORS.HEADER, name)}") + err_count += 1 + print(BCOLORS._(BCOLORS.WARNING, err_count)) + diff --git a/AutoBangumi/app/bangumi_parser/analyser/simple_analyser.py b/AutoBangumi/app/bangumi_parser/analyser/simple_analyser.py index 3d8b01a8..44678e55 100644 --- a/AutoBangumi/app/bangumi_parser/analyser/simple_analyser.py +++ b/AutoBangumi/app/bangumi_parser/analyser/simple_analyser.py @@ -64,7 +64,7 @@ class SimpleAnalyser: if flag: break if not flag: - logger.debug("ERROR Not match with {name}") + logger.debug(f"ERROR Not match with {name}") return match_title_season = re.match(MatchRule.season_match, title, re.I) if match_title_season is not None: diff --git a/AutoBangumi/app/bangumi_parser/episode.py b/AutoBangumi/app/bangumi_parser/episode.py index 90b86ca2..d56e2ed6 100644 --- a/AutoBangumi/app/bangumi_parser/episode.py +++ b/AutoBangumi/app/bangumi_parser/episode.py @@ -16,7 +16,7 @@ class Episode: self.number: int = None @dataclass - class NumberInfo: + class EpisodeInfo: def __init__(self) -> None: self.raw: str = None self.number: int = None @@ -33,6 +33,6 @@ class Episode: self.group: str = None self.title_info = Episode.TitleInfo() self.season_info = Episode.SeasonInfo() - self.number_info = Episode.NumberInfo() + self.number_info = Episode.EpisodeInfo() self.format: str = None self.subtitle: str = None diff --git a/AutoBangumi/app/const.py b/AutoBangumi/app/const.py index adf57ed5..cc36c4f9 100644 --- a/AutoBangumi/app/const.py +++ b/AutoBangumi/app/const.py @@ -4,7 +4,7 @@ from math import fabs DEFAULT_SETTINGS = { - "host_ip": "localhost:8080", + "host_ip": "192.168.31.10:10101", "sleep_time": 1800, "user_name": "admin", "password": "adminadmin", diff --git a/AutoBangumi/app/const_dev.py b/AutoBangumi/app/const_dev.py index 250217fb..be374ae5 100644 --- a/AutoBangumi/app/const_dev.py +++ b/AutoBangumi/app/const_dev.py @@ -1,5 +1,8 @@ DEV_SETTINGS = { - "host_ip": "qb.findix.cn", + "host_ip": "192.168.31.10:10101", + "user_name": "admin", + "password": "adminadmin", + "rss_link": "https://mikanani.me/RSS/classic", "sleep_time": 10, "info_path": "../config/bangumi.json", "rule_path": "../config/rule.json", diff --git a/AutoBangumi/app/core/rss_collector.py b/AutoBangumi/app/core/rss_collector.py index 3f9a5aec..c1765f67 100644 --- a/AutoBangumi/app/core/rss_collector.py +++ b/AutoBangumi/app/core/rss_collector.py @@ -42,4 +42,4 @@ class RSSCollector: "added": False, } ) - logger.debug("add {json_title} {json_season}") + logger.debug(f"add {title} {season}") diff --git a/README.md b/README.md index 30e2a881..21407316 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ docker run -d \ -e METHOD=pn \ #optional -e GROUP_TAG=True \ #optional -e DOWNLOAD_PATH=/path/downloads \ - -e NOT_COTAIN=720 + -e NOT_COTAIN=720 \ -e RSS= \ --network=host \ --dns=8.8.8.8 \