ParserLV2 update

This commit is contained in:
EstrellaXD
2022-06-03 17:57:42 +08:00
parent c9060ebd39
commit 2e0c74db80
7 changed files with 123 additions and 7 deletions

View File

@@ -0,0 +1,113 @@
import logging
import re
from bangumi_parser.episode import Episode
logger = logging.getLogger(__name__)
class ParserLV2:
def __init__(self) -> None:
self.name = None
def pre_process(self, raw_name):
if re.search(r"新番|月?番", raw_name):
pro_name = re.sub(".*新番.", "", raw_name)
else:
pro_name = re.sub(r"^[^]】]*[]】]", "", raw_name).strip()
return pro_name
def season_process(self, name_season):
season_rule = r"S\d{1,2}|Season \d{1,2}|[第].[季期]"
season_map = {
"": 1,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 10,
}
name_season = re.sub(r"[\[\]]", " ", name_season)
seasons = re.findall(season_rule, name_season)
if not seasons:
name = name_season
season = 1
else:
name = re.sub(season_rule, "", name_season)
for season in seasons:
if re.search(r"S|Season", season) is not None:
season = int(re.sub(r"S|Season", "", season))
break
elif re.search(r"[第 ].*[季期]", season) is not None:
season_pro = re.sub(r"[第季期 ]", "", season)
try:
season = int(season_pro)
except ValueError:
season = season_map[season_pro]
break
return name, season
def name_process(self, name):
split = re.split("/| |- ", name.replace("(仅限港澳台地区)", ""))
while "" in split:
split.remove("")
if len(split) == 1:
if re.search("_{1}", split[0]) is not None:
split = re.split("_", split[0])
if len(split) == 1:
if re.search(" - {1}", split[0]) is not None:
split = re.split("-", split[0])
if len(split) == 1:
match_obj = re.match(r"([^\x00-\xff]{1,}) ([\x00-\xff]{4,})", split[0])
if match_obj is not None:
return match_obj.group(2)
for name in split:
if re.search("[\x00-\xff]{4}", name.strip()) is not None:
return name
return split[0]
def process(self, raw_name):
raw_name = raw_name.replace("", "[").replace("", "]")
match_obj = re.match(r"(.*|\[.*])( -? \d{1,3} |\[\d{1,3}]|\[\d{1,3}.?[vV]\d{1}]|[第第]\d{1,3}[话話集集]|\[\d{1,3}.?END])(.*)", raw_name)
name_season = self.pre_process(match_obj.group(1))
name, season = self.season_process(name_season)
name = self.name_process(name).strip()
episode = int(re.findall(r"\d{1,3}", match_obj.group(2))[0])
other = match_obj.group(3).strip()
language = None
return name, season, episode
def run(self, raw) -> Episode:
try:
name, season, episode = self.process(raw)
info = Episode()
info.title = name
info.season_info.number = season
info.EpisodeInfo.number = episode
except:
logger.warning(f"ERROR match {raw}")
if __name__ == "__main__":
import sys, os
sys.path.append(os.path.dirname(".."))
from const import BCOLORS
parser = ParserLV2()
with (open("bangumi_parser/names.txt", "r", encoding="utf-8") as f):
err_count = 0
for name in f:
if name != "":
try:
print(name)
title, season, episode = parser.process(name)
print(title)
print(season)
print(episode)
except:
if re.search(r"\d{1,3}[-~]\d{1,3}|OVA|BD|電影|剧场版|老番|冷番|OAD|合集|劇場版|柯南|海賊王|蜡笔小新|整理|樱桃小丸子", name) is None:
print(f"{BCOLORS._(BCOLORS.HEADER, name)}")
err_count += 1
print(BCOLORS._(BCOLORS.WARNING, err_count))

View File

@@ -64,7 +64,7 @@ class SimpleAnalyser:
if flag:
break
if not flag:
logger.debug("ERROR Not match with {name}")
logger.debug(f"ERROR Not match with {name}")
return
match_title_season = re.match(MatchRule.season_match, title, re.I)
if match_title_season is not None:

View File

@@ -16,7 +16,7 @@ class Episode:
self.number: int = None
@dataclass
class NumberInfo:
class EpisodeInfo:
def __init__(self) -> None:
self.raw: str = None
self.number: int = None
@@ -33,6 +33,6 @@ class Episode:
self.group: str = None
self.title_info = Episode.TitleInfo()
self.season_info = Episode.SeasonInfo()
self.number_info = Episode.NumberInfo()
self.number_info = Episode.EpisodeInfo()
self.format: str = None
self.subtitle: str = None

View File

@@ -4,7 +4,7 @@ from math import fabs
DEFAULT_SETTINGS = {
"host_ip": "localhost:8080",
"host_ip": "192.168.31.10:10101",
"sleep_time": 1800,
"user_name": "admin",
"password": "adminadmin",

View File

@@ -1,5 +1,8 @@
DEV_SETTINGS = {
"host_ip": "qb.findix.cn",
"host_ip": "192.168.31.10:10101",
"user_name": "admin",
"password": "adminadmin",
"rss_link": "https://mikanani.me/RSS/classic",
"sleep_time": 10,
"info_path": "../config/bangumi.json",
"rule_path": "../config/rule.json",

View File

@@ -42,4 +42,4 @@ class RSSCollector:
"added": False,
}
)
logger.debug("add {json_title} {json_season}")
logger.debug(f"add {title} {season}")

View File

@@ -94,7 +94,7 @@ docker run -d \
-e METHOD=pn \ #optional
-e GROUP_TAG=True \ #optional
-e DOWNLOAD_PATH=/path/downloads \
-e NOT_COTAIN=720
-e NOT_COTAIN=720 \
-e RSS=<YOUR_RSS_ADDRESS> \
--network=host \
--dns=8.8.8.8 \