mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-29 21:10:54 +08:00
2.5.11
- 重构目录 - 完成 repath 模块
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import logging
|
||||
import re
|
||||
from parser.episode import Episode
|
||||
from dataclasses import dataclass
|
||||
|
||||
# from parser.episode import Episode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -26,6 +28,20 @@ CHINESE_NUMBER_MAP = {
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Episode:
|
||||
title_en: str
|
||||
title_zh: str
|
||||
title_jp: str
|
||||
season: int
|
||||
season_raw: str
|
||||
episode: int
|
||||
sub: str
|
||||
group: str
|
||||
resolution: str
|
||||
source: str
|
||||
|
||||
|
||||
class RawParser:
|
||||
@staticmethod
|
||||
def get_group(name: str) -> str:
|
||||
@@ -63,6 +79,7 @@ class RawParser:
|
||||
|
||||
@staticmethod
|
||||
def name_process(name: str):
|
||||
name_en, name_zh, name_jp = "", "", ""
|
||||
name = name.strip()
|
||||
split = re.split("/|\s{2}|-\s{2}", name.replace("(仅限港澳台地区)", ""))
|
||||
while "" in split:
|
||||
@@ -73,17 +90,19 @@ class RawParser:
|
||||
elif re.search(" - {1}", name) is not None:
|
||||
split = re.split("-", name)
|
||||
if len(split) == 1:
|
||||
match_obj = re.match(
|
||||
r"([^\x00-\xff]{1,})(\s)([\x00-\xff]{4,})", name)
|
||||
if match_obj is not None:
|
||||
return match_obj.group(3), split
|
||||
compare, compare_idx = 0, 0
|
||||
for idx, name in list(enumerate(split)):
|
||||
l = re.findall("[aA-zZ]{1}", name).__len__()
|
||||
if l > compare:
|
||||
compare = l
|
||||
compare_idx = idx
|
||||
return split[compare_idx].strip(), split
|
||||
split_space = name.split(" ")
|
||||
for idx, item in enumerate(split_space):
|
||||
if re.search(r"^[\u4e00-\u9fa5]{2,}", item) is not None:
|
||||
split = [item.strip(), " ".join(split_space[idx+1:]).strip()]
|
||||
break
|
||||
for item in split:
|
||||
if re.search(r"[\u0800-\u4e00]{2,}", item):
|
||||
name_jp = item
|
||||
elif re.search(r"[\u4e00-\u9fa5]{2,}", item):
|
||||
name_zh = item
|
||||
elif re.search(r"[a-zA-Z]{3,}", item):
|
||||
name_en = item
|
||||
return name_en, name_zh, name_jp
|
||||
|
||||
@staticmethod
|
||||
def find_tags(other):
|
||||
@@ -115,9 +134,9 @@ class RawParser:
|
||||
lambda x: x.strip(), match_obj.groups()
|
||||
))
|
||||
raw_name, season_raw, season = self.season_process(season_info) # 处理 第n季
|
||||
name, name_group = "", ""
|
||||
name_en, name_zh, name_jp = "", "", ""
|
||||
try:
|
||||
name, name_group = self.name_process(raw_name) # 处理 名字
|
||||
name_en, name_zh, name_jp = self.name_process(raw_name) # 处理 名字
|
||||
except ValueError:
|
||||
pass
|
||||
# 处理 集数
|
||||
@@ -126,30 +145,23 @@ class RawParser:
|
||||
if raw_episode is not None:
|
||||
episode = int(raw_episode.group())
|
||||
sub, dpi, source = self.find_tags(other) # 剩余信息处理
|
||||
return name, season, season_raw, episode, sub, dpi, source, name_group, group
|
||||
return name_en, name_zh, name_jp, season, season_raw, episode, sub, dpi, source, group
|
||||
|
||||
def analyse(self, raw: str) -> Episode or None:
|
||||
try:
|
||||
ret = self.process(raw)
|
||||
if ret is None:
|
||||
return None
|
||||
name, season, sr, episode, \
|
||||
sub, dpi, source, ng, group = ret
|
||||
name_en, name_zh, name_jp, season, sr, episode, \
|
||||
sub, dpi, source, group = ret
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR match {raw} {e}")
|
||||
return None
|
||||
info = Episode()
|
||||
info.title = name
|
||||
info.season_info.number, info.season_info.raw = season, sr
|
||||
info.ep_info.number = episode
|
||||
info.subtitle, info.dpi, info.source = sub, dpi, source
|
||||
info.title_info.group = ng
|
||||
info.group = group
|
||||
return info
|
||||
return Episode(name_en, name_zh, name_jp, season, sr, episode, sub, group, dpi, source)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test = RawParser()
|
||||
test_txt = "[SWSUB][7月新番][继母的拖油瓶是我的前女友/継母の连れ子が元カノだった][001][GB_JP][AVC][1080P][网盘][无修正] [331.6MB] [复制磁连]"
|
||||
ep = test.analyse(test_txt)
|
||||
print(ep.title)
|
||||
en, zh, jp = test.name_process("继母的拖油瓶是我的前女友/継母の连れ子が元カノだった")
|
||||
print(f"en:{en}, zh:{zh}, jp:{jp}")
|
||||
|
||||
@@ -100,7 +100,7 @@ class DownloadParser:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
name = "[sub][Isekai Meikyuu de Harem wo][01][BIG5][1080P][AT-X].mp4"
|
||||
name = " [MCE][Kidou Senshi Gundam Suisei no Majo][PROLOGUE][00][GB][1080p][x264 AAC].mp4"
|
||||
rename = DownloadParser()
|
||||
new_name = rename.download_rename(name, "Made abyess", 1, ".mp4", "pn")
|
||||
print(new_name)
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
import re
|
||||
import logging
|
||||
import requests
|
||||
from conf.conf import settings
|
||||
from utils import json_config
|
||||
|
||||
from parser.episode import Episode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MatchRule:
|
||||
split_rule = r"\[|\]|\【|\】|\★|\(|\)|\(|\)"
|
||||
last_rule = r"(.*)( \-)"
|
||||
sub_title = r"[^\x00-\xff]{1,}| \d{1,2}^.*|\·"
|
||||
match_rule = r"(S\d{1,2}(.*))"
|
||||
season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季|第.*期)"
|
||||
season_number_match = r"(\d+)"
|
||||
|
||||
|
||||
# 简单往往是最好的
|
||||
class SimpleAnalyser:
|
||||
def __init__(self) -> None:
|
||||
self.rules = json_config.load(settings.rule_path)
|
||||
try:
|
||||
self.rules = requests.get(settings.rule_url).json()
|
||||
json_config.save(settings.rule_path, self.rules)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
|
||||
def analyse(self, name) -> Episode:
|
||||
flag = False
|
||||
for rule in self.rules:
|
||||
for group in rule["group_name"]:
|
||||
if re.search(group, name):
|
||||
n = re.split(MatchRule.split_rule, name)
|
||||
while "" in n:
|
||||
n.remove("")
|
||||
while " " in n:
|
||||
n.remove(" ")
|
||||
try:
|
||||
title = n[rule["name_position"]].strip()
|
||||
except IndexError:
|
||||
continue
|
||||
sub_title = re.sub(MatchRule.sub_title, "", title)
|
||||
b = re.split(r"\/|\_", sub_title)
|
||||
while "" in b:
|
||||
b.remove("")
|
||||
pre_name = max(b, key=len, default="").strip()
|
||||
if len(pre_name.encode()) > 3:
|
||||
title = pre_name
|
||||
for i in range(2):
|
||||
match_obj = re.match(MatchRule.last_rule, title, re.I)
|
||||
if match_obj is not None:
|
||||
title = match_obj.group(1).strip()
|
||||
match_obj = re.match(MatchRule.match_rule, title, re.I)
|
||||
if match_obj is not None:
|
||||
title = match_obj.group(2).strip()
|
||||
# debug
|
||||
# print(bangumi_title)
|
||||
# print(group)
|
||||
flag = True
|
||||
break
|
||||
if flag:
|
||||
break
|
||||
if not flag:
|
||||
logger.debug(f"ERROR Not match with {name}")
|
||||
return
|
||||
match_title_season = re.match(MatchRule.season_match, title, re.I)
|
||||
if match_title_season is not None:
|
||||
title = match_title_season.group(1).strip()
|
||||
season = match_title_season.group(2)
|
||||
match_season_number = re.findall(MatchRule.season_number_match, season)
|
||||
try:
|
||||
season_number = int(match_season_number[0])
|
||||
except:
|
||||
logger.warning(
|
||||
f"title:{title} season:{season} can't match season in number"
|
||||
)
|
||||
finally:
|
||||
season_number = 1
|
||||
else:
|
||||
season = "S01"
|
||||
season_number = 1
|
||||
episode = Episode()
|
||||
episode.title = title
|
||||
episode.group = group
|
||||
episode.season_info.raw = season
|
||||
episode.season_info.number = season_number
|
||||
return episode
|
||||
@@ -1,42 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Episode:
|
||||
@dataclass
|
||||
class TitleInfo:
|
||||
def __init__(self) -> None:
|
||||
self.raw: str = None
|
||||
self.name: str = None
|
||||
self.official: str = None
|
||||
self.group: list = None
|
||||
|
||||
@dataclass
|
||||
class SeasonInfo:
|
||||
def __init__(self) -> None:
|
||||
self.raw: str = None
|
||||
self.number: int = None
|
||||
|
||||
@dataclass
|
||||
class EpisodeInfo:
|
||||
def __init__(self) -> None:
|
||||
self.raw: str = None
|
||||
self.number: int = None
|
||||
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
return self.title_info.name
|
||||
|
||||
@title.setter
|
||||
def title(self, title: str):
|
||||
self.title_info.name = title
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.group: str = None
|
||||
self.title_info = Episode.TitleInfo()
|
||||
self.season_info = Episode.SeasonInfo()
|
||||
self.ep_info = Episode.EpisodeInfo()
|
||||
self.dpi: str = None
|
||||
self.subtitle: str = None
|
||||
self.source: str = None
|
||||
@@ -18,7 +18,7 @@ class TitleParser:
|
||||
def download_parser(self, download_raw, folder_name, season, suffix, method=settings.method):
|
||||
return self._download_parser.download_rename(download_raw, folder_name, season, suffix, method)
|
||||
|
||||
def tmdb_parser(self, title: str, season:int):
|
||||
def tmdb_parser(self, title: str, season: int):
|
||||
try:
|
||||
tmdb_info = self._tmdb_parser.tmdb_search(title)
|
||||
logger.debug(f"TMDB Matched, title is {tmdb_info.title_zh}")
|
||||
@@ -35,24 +35,25 @@ class TitleParser:
|
||||
def return_dict(self, raw: str):
|
||||
try:
|
||||
episode = self.raw_parser(raw)
|
||||
title_search = episode.title_zh if episode.title_zh != "" else episode.title_en
|
||||
if settings.enable_tmdb:
|
||||
official_title, season = self.tmdb_parser(episode.title, episode.season_info.number)
|
||||
official_title, season = self.tmdb_parser(title_search, episode.season)
|
||||
else:
|
||||
official_title = episode.title
|
||||
season = episode.season_info.number
|
||||
official_title = title_search
|
||||
season = episode.season
|
||||
data = {
|
||||
"official_title": official_title,
|
||||
"title_raw": episode.title,
|
||||
"season": season,
|
||||
"season_raw": episode.season_info.raw,
|
||||
"title_raw": episode.title_en,
|
||||
"season": season if season is not None else episode.season,
|
||||
"season_raw": episode.season_raw,
|
||||
"group": episode.group,
|
||||
"dpi": episode.dpi,
|
||||
"dpi": episode.resolution,
|
||||
"source": episode.source,
|
||||
"subtitle": episode.subtitle,
|
||||
"subtitle": episode.sub,
|
||||
"added": False,
|
||||
"eps_collect": True if settings.eps_complete and episode.ep_info.number > 1 else False,
|
||||
}
|
||||
logger.debug(f"RAW:{raw} >> {episode.title}")
|
||||
logger.debug(f"RAW:{raw} >> {episode.title_en}")
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.debug(e)
|
||||
@@ -63,7 +64,7 @@ if __name__ == '__main__':
|
||||
from conf.const_dev import DEV_SETTINGS
|
||||
settings.init(DEV_SETTINGS)
|
||||
T = TitleParser()
|
||||
raw = "[SWSUB][7月新番][继母的拖油瓶是我的前女友/継母の连れ子が元カノだった][001][GB_JP][AVC][1080P][网盘][无修正] [331.6MB] [复制磁连]"
|
||||
raw = "[Lilith-Raws] 在地下城寻求邂逅是否搞错了什么 / Danmachi S04 - 00 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]"
|
||||
season = int(re.search(r"\d{1,2}", "S02").group())
|
||||
dict = T.return_dict(raw)
|
||||
print(dict)
|
||||
|
||||
Reference in New Issue
Block a user