From 87224cd8ad4cac52f91ed5b47da32cbd00852b12 Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Mon, 10 Jul 2023 22:36:55 +0800 Subject: [PATCH] fix: parser error #385 #375 --- .../src/module/parser/analyser/raw_parser.py | 22 ++++++++++++++----- backend/src/test/test_raw_parser.py | 10 +++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/backend/src/module/parser/analyser/raw_parser.py b/backend/src/module/parser/analyser/raw_parser.py index 37458d85..1f3c3469 100644 --- a/backend/src/module/parser/analyser/raw_parser.py +++ b/backend/src/module/parser/analyser/raw_parser.py @@ -94,11 +94,23 @@ def name_process(name: str): split = re.split("-", name) if len(split) == 1: split_space = split[0].split(" ") - for idx, item in enumerate(split_space): + language_pattern = [] + for item in split_space: if re.search(r"^[\u4e00-\u9fa5]{2,}", item) is not None: - split_space.remove(item) - split = [item.strip(), " ".join(split_space).strip()] - break + language_pattern.append(1) + elif re.search(r"[a-zA-Z]{2,}", item) is not None: + language_pattern.append(0) + elif re.search(r"[\u0800-\u4e00]{2,}", item) is not None: + language_pattern.append(2) + split = [split_space[0]] + for i in range(1, len(split_space)): + # 如果当前字符串的语言与上一个字符串的语言相同 + if language_pattern[i] == language_pattern[i - 1]: + # 合并这两个字符串 + split[-1] += ' ' + split_space[i] + else: + # 否则,将当前字符串添加到结果列表中 + split.append(split_space[i]) for item in split: if re.search(r"[\u0800-\u4e00]{2,}", item) and not name_jp: name_jp = item.strip() @@ -184,5 +196,5 @@ def raw_parser(raw: str) -> Episode | None: if __name__ == "__main__": - title = "【极影字幕·毁片党】LoveLive! SunShine!! 幻日的夜羽 -SUNSHINE in the MIRROR- 第01集 TV版 HEVC_opus 1080p " + title = "【极影字幕·毁片党】LoveLive! SunShine!! 幻日的夜羽 -SUNSHINE in the MIRROR- 第01集 TV版 HEVC_opus 1080p" print(raw_parser(title)) diff --git a/backend/src/test/test_raw_parser.py b/backend/src/test/test_raw_parser.py index 60297c8e..39f1944c 100644 --- a/backend/src/test/test_raw_parser.py +++ b/backend/src/test/test_raw_parser.py @@ -78,3 +78,13 @@ def test_raw_parser(): assert info.resolution == "1080p" assert info.episode == 1 assert info.season == 1 + + content = "[ANi] BLEACH 死神 千年血战篇-诀别谭- - 14 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]" + info = raw_parser(content) + assert info.group == "ANi" + assert info.title_en == "BLEACH" + assert info.title_zh == "死神 千年血战篇-诀别谭-" + assert info.resolution == "1080P" + assert info.episode == 14 + assert info.season == 1 +