From a5c31a469de728bccd284f4f45456ecceb062fba Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Sat, 16 Jul 2022 16:17:43 +0800 Subject: [PATCH] =?UTF-8?q?2.5.11=20-=20=E9=87=8D=E6=9E=84=E7=9B=AE?= =?UTF-8?q?=E5=BD=95=20-=20=E5=AE=8C=E6=88=90=20repath=20=E6=A8=A1?= =?UTF-8?q?=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker.yml | 2 +- src/conf/const.py | 2 +- src/core/repath.py | 30 ++++----- src/core/rss_analyser.py | 3 +- src/parser/analyser/raw_parser.py | 66 +++++++++++-------- src/parser/analyser/rename_parser.py | 2 +- src/parser/analyser/simple_analyser.py | 90 -------------------------- src/parser/episode.py | 42 ------------ src/parser/title_parser.py | 23 +++---- src/tests/test_raw_parser.py | 30 ++++----- 10 files changed, 86 insertions(+), 204 deletions(-) delete mode 100644 src/parser/analyser/simple_analyser.py delete mode 100644 src/parser/episode.py diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 45dc1eef..fdbee16a 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -3,7 +3,7 @@ name: Build(Docker) on: release: types: - - published + - released workflow_dispatch: jobs: diff --git a/src/conf/const.py b/src/conf/const.py index 3cdecd20..80d5cb00 100644 --- a/src/conf/const.py +++ b/src/conf/const.py @@ -13,8 +13,8 @@ DEFAULT_SETTINGS = { "enable_group_tag": False, "info_path": "/config/bangumi.json", "not_contain": r"720|\d+-\d+", - "rule_name_re": r"\:|\/|\.", "connect_retry_interval": 5, + "rule_name_re": r"\:|\/|\.", "debug_mode": False, "remove_bad_torrent": False, "dev_debug": False, diff --git a/src/core/repath.py b/src/core/repath.py index b7afda32..d11bbd24 100644 --- a/src/core/repath.py +++ b/src/core/repath.py @@ -1,11 +1,14 @@ -import pathlib +import logging import re from dataclasses import dataclass from pathlib import PurePath, PureWindowsPath + from core import DownloadClient from conf import settings from utils import json_config +logger = logging.getLogger(__name__) + @dataclass class RuleInfo: @@ -27,11 +30,6 @@ class RePath: self._client = download_client self.re_season = re.compile(r"S\d{1,2}") - @staticmethod - def get_data() -> list: - data = json_config.load(settings.info_path) - return data.get("bangumi_info") - @staticmethod def analyse_path(path: str): path_parts = PurePath(path).parts @@ -51,16 +49,17 @@ class RePath: all_rule.append(RuleInfo(rule, must_contain, season, folder_name, new_path)) return all_rule - def get_difference(self, bangumi_data: list, rules: list): + @staticmethod + def get_difference(bangumi_data: list, rules: [RuleInfo]) -> [RuleInfo]: different_data = [] - for rule in rules: - for item in bangumi_data: - if item["official_title"] == self.re_season.sub("", rule.rule_name).strip(): - if item["season"] != rule.season: - item["season"] = rule.season - item["official_title"] = self.re_season.sub("", rule.rule_name).strip() - different_data.append(item) - break + for data in bangumi_data: + for rule in rules: + rule_name = re.sub(r"S\d", "", rule.rule_name).strip() + if data.get("official_title") == rule_name: + if data.get("season") != rule.season: + different_data.append(rule) + data["season"] = rule.season + break return different_data def get_matched_torrents_list(self, repath_rules: [RuleInfo]) -> [RePathInfo]: @@ -83,6 +82,7 @@ class RePath: def run(self): rules = self.get_rule() match_list = self.get_matched_torrents_list(rules) + logging.info(f"Starting repath process.") for list in match_list: self.re_path(list) diff --git a/src/core/rss_analyser.py b/src/core/rss_analyser.py index 2720564a..970ecfc8 100644 --- a/src/core/rss_analyser.py +++ b/src/core/rss_analyser.py @@ -59,4 +59,5 @@ if __name__ == "__main__": data = [] ra.run(data, client) for d in data: - print(d) \ No newline at end of file + print(d.get("official_title")) + print(d.get("season")) \ No newline at end of file diff --git a/src/parser/analyser/raw_parser.py b/src/parser/analyser/raw_parser.py index 9716923a..7d4c1ba5 100644 --- a/src/parser/analyser/raw_parser.py +++ b/src/parser/analyser/raw_parser.py @@ -1,6 +1,8 @@ import logging import re -from parser.episode import Episode +from dataclasses import dataclass + +# from parser.episode import Episode logger = logging.getLogger(__name__) @@ -26,6 +28,20 @@ CHINESE_NUMBER_MAP = { } +@dataclass +class Episode: + title_en: str + title_zh: str + title_jp: str + season: int + season_raw: str + episode: int + sub: str + group: str + resolution: str + source: str + + class RawParser: @staticmethod def get_group(name: str) -> str: @@ -63,6 +79,7 @@ class RawParser: @staticmethod def name_process(name: str): + name_en, name_zh, name_jp = "", "", "" name = name.strip() split = re.split("/|\s{2}|-\s{2}", name.replace("(仅限港澳台地区)", "")) while "" in split: @@ -73,17 +90,19 @@ class RawParser: elif re.search(" - {1}", name) is not None: split = re.split("-", name) if len(split) == 1: - match_obj = re.match( - r"([^\x00-\xff]{1,})(\s)([\x00-\xff]{4,})", name) - if match_obj is not None: - return match_obj.group(3), split - compare, compare_idx = 0, 0 - for idx, name in list(enumerate(split)): - l = re.findall("[aA-zZ]{1}", name).__len__() - if l > compare: - compare = l - compare_idx = idx - return split[compare_idx].strip(), split + split_space = name.split(" ") + for idx, item in enumerate(split_space): + if re.search(r"^[\u4e00-\u9fa5]{2,}", item) is not None: + split = [item.strip(), " ".join(split_space[idx+1:]).strip()] + break + for item in split: + if re.search(r"[\u0800-\u4e00]{2,}", item): + name_jp = item + elif re.search(r"[\u4e00-\u9fa5]{2,}", item): + name_zh = item + elif re.search(r"[a-zA-Z]{3,}", item): + name_en = item + return name_en, name_zh, name_jp @staticmethod def find_tags(other): @@ -115,9 +134,9 @@ class RawParser: lambda x: x.strip(), match_obj.groups() )) raw_name, season_raw, season = self.season_process(season_info) # 处理 第n季 - name, name_group = "", "" + name_en, name_zh, name_jp = "", "", "" try: - name, name_group = self.name_process(raw_name) # 处理 名字 + name_en, name_zh, name_jp = self.name_process(raw_name) # 处理 名字 except ValueError: pass # 处理 集数 @@ -126,30 +145,23 @@ class RawParser: if raw_episode is not None: episode = int(raw_episode.group()) sub, dpi, source = self.find_tags(other) # 剩余信息处理 - return name, season, season_raw, episode, sub, dpi, source, name_group, group + return name_en, name_zh, name_jp, season, season_raw, episode, sub, dpi, source, group def analyse(self, raw: str) -> Episode or None: try: ret = self.process(raw) if ret is None: return None - name, season, sr, episode, \ - sub, dpi, source, ng, group = ret + name_en, name_zh, name_jp, season, sr, episode, \ + sub, dpi, source, group = ret except Exception as e: logger.error(f"ERROR match {raw} {e}") return None - info = Episode() - info.title = name - info.season_info.number, info.season_info.raw = season, sr - info.ep_info.number = episode - info.subtitle, info.dpi, info.source = sub, dpi, source - info.title_info.group = ng - info.group = group - return info + return Episode(name_en, name_zh, name_jp, season, sr, episode, sub, group, dpi, source) if __name__ == "__main__": test = RawParser() test_txt = "[SWSUB][7月新番][继母的拖油瓶是我的前女友/継母の连れ子が元カノだった][001][GB_JP][AVC][1080P][网盘][无修正] [331.6MB] [复制磁连]" - ep = test.analyse(test_txt) - print(ep.title) + en, zh, jp = test.name_process("继母的拖油瓶是我的前女友/継母の连れ子が元カノだった") + print(f"en:{en}, zh:{zh}, jp:{jp}") diff --git a/src/parser/analyser/rename_parser.py b/src/parser/analyser/rename_parser.py index 9d461814..3ff1715b 100644 --- a/src/parser/analyser/rename_parser.py +++ b/src/parser/analyser/rename_parser.py @@ -100,7 +100,7 @@ class DownloadParser: if __name__ == "__main__": - name = "[sub][Isekai Meikyuu de Harem wo][01][BIG5][1080P][AT-X].mp4" + name = " [MCE][Kidou Senshi Gundam Suisei no Majo][PROLOGUE][00][GB][1080p][x264 AAC].mp4" rename = DownloadParser() new_name = rename.download_rename(name, "Made abyess", 1, ".mp4", "pn") print(new_name) diff --git a/src/parser/analyser/simple_analyser.py b/src/parser/analyser/simple_analyser.py deleted file mode 100644 index 8b1a7393..00000000 --- a/src/parser/analyser/simple_analyser.py +++ /dev/null @@ -1,90 +0,0 @@ -import re -import logging -import requests -from conf.conf import settings -from utils import json_config - -from parser.episode import Episode - -logger = logging.getLogger(__name__) - - -class MatchRule: - split_rule = r"\[|\]|\【|\】|\★|\(|\)|\(|\)" - last_rule = r"(.*)( \-)" - sub_title = r"[^\x00-\xff]{1,}| \d{1,2}^.*|\·" - match_rule = r"(S\d{1,2}(.*))" - season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季|第.*期)" - season_number_match = r"(\d+)" - - -# 简单往往是最好的 -class SimpleAnalyser: - def __init__(self) -> None: - self.rules = json_config.load(settings.rule_path) - try: - self.rules = requests.get(settings.rule_url).json() - json_config.save(settings.rule_path, self.rules) - except Exception as e: - logger.exception(e) - - def analyse(self, name) -> Episode: - flag = False - for rule in self.rules: - for group in rule["group_name"]: - if re.search(group, name): - n = re.split(MatchRule.split_rule, name) - while "" in n: - n.remove("") - while " " in n: - n.remove(" ") - try: - title = n[rule["name_position"]].strip() - except IndexError: - continue - sub_title = re.sub(MatchRule.sub_title, "", title) - b = re.split(r"\/|\_", sub_title) - while "" in b: - b.remove("") - pre_name = max(b, key=len, default="").strip() - if len(pre_name.encode()) > 3: - title = pre_name - for i in range(2): - match_obj = re.match(MatchRule.last_rule, title, re.I) - if match_obj is not None: - title = match_obj.group(1).strip() - match_obj = re.match(MatchRule.match_rule, title, re.I) - if match_obj is not None: - title = match_obj.group(2).strip() - # debug - # print(bangumi_title) - # print(group) - flag = True - break - if flag: - break - if not flag: - logger.debug(f"ERROR Not match with {name}") - return - match_title_season = re.match(MatchRule.season_match, title, re.I) - if match_title_season is not None: - title = match_title_season.group(1).strip() - season = match_title_season.group(2) - match_season_number = re.findall(MatchRule.season_number_match, season) - try: - season_number = int(match_season_number[0]) - except: - logger.warning( - f"title:{title} season:{season} can't match season in number" - ) - finally: - season_number = 1 - else: - season = "S01" - season_number = 1 - episode = Episode() - episode.title = title - episode.group = group - episode.season_info.raw = season - episode.season_info.number = season_number - return episode diff --git a/src/parser/episode.py b/src/parser/episode.py deleted file mode 100644 index 37eaa579..00000000 --- a/src/parser/episode.py +++ /dev/null @@ -1,42 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class Episode: - @dataclass - class TitleInfo: - def __init__(self) -> None: - self.raw: str = None - self.name: str = None - self.official: str = None - self.group: list = None - - @dataclass - class SeasonInfo: - def __init__(self) -> None: - self.raw: str = None - self.number: int = None - - @dataclass - class EpisodeInfo: - def __init__(self) -> None: - self.raw: str = None - self.number: int = None - - - @property - def title(self) -> str: - return self.title_info.name - - @title.setter - def title(self, title: str): - self.title_info.name = title - - def __init__(self) -> None: - self.group: str = None - self.title_info = Episode.TitleInfo() - self.season_info = Episode.SeasonInfo() - self.ep_info = Episode.EpisodeInfo() - self.dpi: str = None - self.subtitle: str = None - self.source: str = None diff --git a/src/parser/title_parser.py b/src/parser/title_parser.py index bf851ab2..dbbc50af 100644 --- a/src/parser/title_parser.py +++ b/src/parser/title_parser.py @@ -18,7 +18,7 @@ class TitleParser: def download_parser(self, download_raw, folder_name, season, suffix, method=settings.method): return self._download_parser.download_rename(download_raw, folder_name, season, suffix, method) - def tmdb_parser(self, title: str, season:int): + def tmdb_parser(self, title: str, season: int): try: tmdb_info = self._tmdb_parser.tmdb_search(title) logger.debug(f"TMDB Matched, title is {tmdb_info.title_zh}") @@ -35,24 +35,25 @@ class TitleParser: def return_dict(self, raw: str): try: episode = self.raw_parser(raw) + title_search = episode.title_zh if episode.title_zh != "" else episode.title_en if settings.enable_tmdb: - official_title, season = self.tmdb_parser(episode.title, episode.season_info.number) + official_title, season = self.tmdb_parser(title_search, episode.season) else: - official_title = episode.title - season = episode.season_info.number + official_title = title_search + season = episode.season data = { "official_title": official_title, - "title_raw": episode.title, - "season": season, - "season_raw": episode.season_info.raw, + "title_raw": episode.title_en, + "season": season if season is not None else episode.season, + "season_raw": episode.season_raw, "group": episode.group, - "dpi": episode.dpi, + "dpi": episode.resolution, "source": episode.source, - "subtitle": episode.subtitle, + "subtitle": episode.sub, "added": False, "eps_collect": True if settings.eps_complete and episode.ep_info.number > 1 else False, } - logger.debug(f"RAW:{raw} >> {episode.title}") + logger.debug(f"RAW:{raw} >> {episode.title_en}") return data except Exception as e: logger.debug(e) @@ -63,7 +64,7 @@ if __name__ == '__main__': from conf.const_dev import DEV_SETTINGS settings.init(DEV_SETTINGS) T = TitleParser() - raw = "[SWSUB][7月新番][继母的拖油瓶是我的前女友/継母の连れ子が元カノだった][001][GB_JP][AVC][1080P][网盘][无修正] [331.6MB] [复制磁连]" + raw = "[Lilith-Raws] 在地下城寻求邂逅是否搞错了什么 / Danmachi S04 - 00 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]" season = int(re.search(r"\d{1,2}", "S02").group()) dict = T.return_dict(raw) print(dict) diff --git a/src/tests/test_raw_parser.py b/src/tests/test_raw_parser.py index f134e862..46422d19 100644 --- a/src/tests/test_raw_parser.py +++ b/src/tests/test_raw_parser.py @@ -11,27 +11,27 @@ class TestRawParser(unittest.TestCase): content = "【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】" info = parser.analyse(content) - self.assertEqual(info.title, "Komi-san wa, Komyushou Desu.") - self.assertEqual(info.dpi, "1920X1080") - self.assertEqual(info.ep_info.number, 22) - self.assertEqual(info.season_info.number, 2) + self.assertEqual(info.title_en, "Komi-san wa, Komyushou Desu.") + self.assertEqual(info.resolution, "1920X1080") + self.assertEqual(info.episode, 22) + self.assertEqual(info.season, 2) content = "[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]" info = parser.analyse(content) self.assertEqual(info.group, "百冬练习组&LoliHouse") - self.assertEqual(info.title, "BanG Dream! 少女乐团派对!☆PICO FEVER!") - self.assertEqual(info.dpi, "1080p") - self.assertEqual(info.ep_info.number, 26) - self.assertEqual(info.season_info.number, 1) + self.assertEqual(info.title_zh, "BanG Dream! 少女乐团派对!☆PICO FEVER!") + self.assertEqual(info.resolution, "1080p") + self.assertEqual(info.episode, 26) + self.assertEqual(info.season, 1) content = "【喵萌奶茶屋】★04月新番★[夏日重现/Summer Time Rendering][11][1080p][繁日双语][招募翻译] [539.4 MB]" info = parser.analyse(content) self.assertEqual(info.group, "喵萌奶茶屋") - self.assertEqual(info.title, "Summer Time Rendering") - self.assertEqual(info.dpi, "1080p") - self.assertEqual(info.ep_info.number, 11) - self.assertEqual(info.season_info.number, 1) + self.assertEqual(info.title_en, "Summer Time Rendering") + self.assertEqual(info.resolution, "1080p") + self.assertEqual(info.episode, 11) + self.assertEqual(info.season, 1) content = "【喵萌奶茶屋】★04月新番★夏日重现/Summer Time Rendering[11][1080p][繁日双语][招募翻译] [539.4 MB]" info = parser.analyse(content) @@ -71,12 +71,12 @@ class TestRawParser(unittest.TestCase): for epi in range(1, 100000, 100): content = f"【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第一季 Komi-san wa, Komyushou Desu. S01】【{epi}】【GB_MP4】【4K】" info = parser.analyse(content) - self.assertEqual(info.ep_info.number, epi) + self.assertEqual(info.episode, epi) for epi in range(1, 100000, 100): content = f"[Nekomoe kissaten][Summer Time Rendering - {epi} [1080p][JPTC].mp4" info = parser.analyse(content) - self.assertEqual(info.ep_info.number, epi) + self.assertEqual(info.episode, epi) def test_season(self): chinese_number_arr = ["一", "二", "三", "四", @@ -86,4 +86,4 @@ class TestRawParser(unittest.TestCase): season = str(i).zfill(2) content = f"【幻樱字幕组】【古见同学有交流障碍症 第{chinese_number_arr[i - 1]}季 Komi-san wa, Komyushou Desu. S{season}】[1]" info = parser.analyse(content) - self.assertEqual(info.season_info.number, i) + self.assertEqual(info.season, i)