- 重构目录
- 完成 repath 模块
This commit is contained in:
EstrellaXD
2022-07-16 16:17:43 +08:00
parent fb2ac72974
commit a5c31a469d
10 changed files with 86 additions and 204 deletions

View File

@@ -1,6 +1,8 @@
import logging
import re
from parser.episode import Episode
from dataclasses import dataclass
# from parser.episode import Episode
logger = logging.getLogger(__name__)
@@ -26,6 +28,20 @@ CHINESE_NUMBER_MAP = {
}
@dataclass
class Episode:
title_en: str
title_zh: str
title_jp: str
season: int
season_raw: str
episode: int
sub: str
group: str
resolution: str
source: str
class RawParser:
@staticmethod
def get_group(name: str) -> str:
@@ -63,6 +79,7 @@ class RawParser:
@staticmethod
def name_process(name: str):
name_en, name_zh, name_jp = "", "", ""
name = name.strip()
split = re.split("/|\s{2}|-\s{2}", name.replace("(仅限港澳台地区)", ""))
while "" in split:
@@ -73,17 +90,19 @@ class RawParser:
elif re.search(" - {1}", name) is not None:
split = re.split("-", name)
if len(split) == 1:
match_obj = re.match(
r"([^\x00-\xff]{1,})(\s)([\x00-\xff]{4,})", name)
if match_obj is not None:
return match_obj.group(3), split
compare, compare_idx = 0, 0
for idx, name in list(enumerate(split)):
l = re.findall("[aA-zZ]{1}", name).__len__()
if l > compare:
compare = l
compare_idx = idx
return split[compare_idx].strip(), split
split_space = name.split(" ")
for idx, item in enumerate(split_space):
if re.search(r"^[\u4e00-\u9fa5]{2,}", item) is not None:
split = [item.strip(), " ".join(split_space[idx+1:]).strip()]
break
for item in split:
if re.search(r"[\u0800-\u4e00]{2,}", item):
name_jp = item
elif re.search(r"[\u4e00-\u9fa5]{2,}", item):
name_zh = item
elif re.search(r"[a-zA-Z]{3,}", item):
name_en = item
return name_en, name_zh, name_jp
@staticmethod
def find_tags(other):
@@ -115,9 +134,9 @@ class RawParser:
lambda x: x.strip(), match_obj.groups()
))
raw_name, season_raw, season = self.season_process(season_info) # 处理 第n季
name, name_group = "", ""
name_en, name_zh, name_jp = "", "", ""
try:
name, name_group = self.name_process(raw_name) # 处理 名字
name_en, name_zh, name_jp = self.name_process(raw_name) # 处理 名字
except ValueError:
pass
# 处理 集数
@@ -126,30 +145,23 @@ class RawParser:
if raw_episode is not None:
episode = int(raw_episode.group())
sub, dpi, source = self.find_tags(other) # 剩余信息处理
return name, season, season_raw, episode, sub, dpi, source, name_group, group
return name_en, name_zh, name_jp, season, season_raw, episode, sub, dpi, source, group
def analyse(self, raw: str) -> Episode or None:
try:
ret = self.process(raw)
if ret is None:
return None
name, season, sr, episode, \
sub, dpi, source, ng, group = ret
name_en, name_zh, name_jp, season, sr, episode, \
sub, dpi, source, group = ret
except Exception as e:
logger.error(f"ERROR match {raw} {e}")
return None
info = Episode()
info.title = name
info.season_info.number, info.season_info.raw = season, sr
info.ep_info.number = episode
info.subtitle, info.dpi, info.source = sub, dpi, source
info.title_info.group = ng
info.group = group
return info
return Episode(name_en, name_zh, name_jp, season, sr, episode, sub, group, dpi, source)
if __name__ == "__main__":
test = RawParser()
test_txt = "[SWSUB][7月新番][继母的拖油瓶是我的前女友/継母の连れ子が元カノだった][001][GB_JP][AVC][1080P][网盘][无修正] [331.6MB] [复制磁连]"
ep = test.analyse(test_txt)
print(ep.title)
en, zh, jp = test.name_process("继母的拖油瓶是我的前女友/継母の连れ子が元カノだった")
print(f"en:{en}, zh:{zh}, jp:{jp}")

View File

@@ -100,7 +100,7 @@ class DownloadParser:
if __name__ == "__main__":
name = "[sub][Isekai Meikyuu de Harem wo][01][BIG5][1080P][AT-X].mp4"
name = " [MCE][Kidou Senshi Gundam Suisei no Majo][PROLOGUE][00][GB][1080p][x264 AAC].mp4"
rename = DownloadParser()
new_name = rename.download_rename(name, "Made abyess", 1, ".mp4", "pn")
print(new_name)

View File

@@ -1,90 +0,0 @@
import re
import logging
import requests
from conf.conf import settings
from utils import json_config
from parser.episode import Episode
logger = logging.getLogger(__name__)
class MatchRule:
split_rule = r"\[|\]|\【|\】|\★|\|\|\(|\)"
last_rule = r"(.*)( \-)"
sub_title = r"[^\x00-\xff]{1,}| \d{1,2}^.*|\·"
match_rule = r"(S\d{1,2}(.*))"
season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季|第.*期)"
season_number_match = r"(\d+)"
# 简单往往是最好的
class SimpleAnalyser:
def __init__(self) -> None:
self.rules = json_config.load(settings.rule_path)
try:
self.rules = requests.get(settings.rule_url).json()
json_config.save(settings.rule_path, self.rules)
except Exception as e:
logger.exception(e)
def analyse(self, name) -> Episode:
flag = False
for rule in self.rules:
for group in rule["group_name"]:
if re.search(group, name):
n = re.split(MatchRule.split_rule, name)
while "" in n:
n.remove("")
while " " in n:
n.remove(" ")
try:
title = n[rule["name_position"]].strip()
except IndexError:
continue
sub_title = re.sub(MatchRule.sub_title, "", title)
b = re.split(r"\/|\_", sub_title)
while "" in b:
b.remove("")
pre_name = max(b, key=len, default="").strip()
if len(pre_name.encode()) > 3:
title = pre_name
for i in range(2):
match_obj = re.match(MatchRule.last_rule, title, re.I)
if match_obj is not None:
title = match_obj.group(1).strip()
match_obj = re.match(MatchRule.match_rule, title, re.I)
if match_obj is not None:
title = match_obj.group(2).strip()
# debug
# print(bangumi_title)
# print(group)
flag = True
break
if flag:
break
if not flag:
logger.debug(f"ERROR Not match with {name}")
return
match_title_season = re.match(MatchRule.season_match, title, re.I)
if match_title_season is not None:
title = match_title_season.group(1).strip()
season = match_title_season.group(2)
match_season_number = re.findall(MatchRule.season_number_match, season)
try:
season_number = int(match_season_number[0])
except:
logger.warning(
f"title:{title} season:{season} can't match season in number"
)
finally:
season_number = 1
else:
season = "S01"
season_number = 1
episode = Episode()
episode.title = title
episode.group = group
episode.season_info.raw = season
episode.season_info.number = season_number
return episode

View File

@@ -1,42 +0,0 @@
from dataclasses import dataclass
@dataclass
class Episode:
@dataclass
class TitleInfo:
def __init__(self) -> None:
self.raw: str = None
self.name: str = None
self.official: str = None
self.group: list = None
@dataclass
class SeasonInfo:
def __init__(self) -> None:
self.raw: str = None
self.number: int = None
@dataclass
class EpisodeInfo:
def __init__(self) -> None:
self.raw: str = None
self.number: int = None
@property
def title(self) -> str:
return self.title_info.name
@title.setter
def title(self, title: str):
self.title_info.name = title
def __init__(self) -> None:
self.group: str = None
self.title_info = Episode.TitleInfo()
self.season_info = Episode.SeasonInfo()
self.ep_info = Episode.EpisodeInfo()
self.dpi: str = None
self.subtitle: str = None
self.source: str = None

View File

@@ -18,7 +18,7 @@ class TitleParser:
def download_parser(self, download_raw, folder_name, season, suffix, method=settings.method):
return self._download_parser.download_rename(download_raw, folder_name, season, suffix, method)
def tmdb_parser(self, title: str, season:int):
def tmdb_parser(self, title: str, season: int):
try:
tmdb_info = self._tmdb_parser.tmdb_search(title)
logger.debug(f"TMDB Matched, title is {tmdb_info.title_zh}")
@@ -35,24 +35,25 @@ class TitleParser:
def return_dict(self, raw: str):
try:
episode = self.raw_parser(raw)
title_search = episode.title_zh if episode.title_zh != "" else episode.title_en
if settings.enable_tmdb:
official_title, season = self.tmdb_parser(episode.title, episode.season_info.number)
official_title, season = self.tmdb_parser(title_search, episode.season)
else:
official_title = episode.title
season = episode.season_info.number
official_title = title_search
season = episode.season
data = {
"official_title": official_title,
"title_raw": episode.title,
"season": season,
"season_raw": episode.season_info.raw,
"title_raw": episode.title_en,
"season": season if season is not None else episode.season,
"season_raw": episode.season_raw,
"group": episode.group,
"dpi": episode.dpi,
"dpi": episode.resolution,
"source": episode.source,
"subtitle": episode.subtitle,
"subtitle": episode.sub,
"added": False,
"eps_collect": True if settings.eps_complete and episode.ep_info.number > 1 else False,
}
logger.debug(f"RAW:{raw} >> {episode.title}")
logger.debug(f"RAW:{raw} >> {episode.title_en}")
return data
except Exception as e:
logger.debug(e)
@@ -63,7 +64,7 @@ if __name__ == '__main__':
from conf.const_dev import DEV_SETTINGS
settings.init(DEV_SETTINGS)
T = TitleParser()
raw = "[SWSUB][7月新番][继母的拖油瓶是我的前女友/継母の连れ子が元カノだった][001][GB_JP][AVC][1080P][网盘][无修正] [331.6MB] [复制磁连]"
raw = "[Lilith-Raws] 在地下城寻求邂逅是否搞错了什么 / Danmachi S04 - 00 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]"
season = int(re.search(r"\d{1,2}", "S02").group())
dict = T.return_dict(raw)
print(dict)