feat: 完善集数定位模板识别与自动推荐稳定性 (#5978)

This commit is contained in:
Album
2026-06-21 07:34:57 +08:00
committed by GitHub
parent 970f2cf1ca
commit b395d820d8
2 changed files with 524 additions and 30 deletions

View File

@@ -1,11 +1,11 @@
import re
from collections import defaultdict
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Dict, Iterable, List, Match, Optional, Tuple
from typing import Dict, Iterable, List, Match, Optional, Tuple, Union
import anitopy
import parse
from app.core.config import settings
from app.core.metainfo import MetaInfoPath
@@ -14,6 +14,70 @@ from app.log import logger
from app.schemas import EpisodeFormatRule, FileItem
@dataclass(frozen=True)
class _TemplateParseResult:
named: Dict[str, str]
spans: Dict[str, Tuple[int, int]]
@lru_cache(maxsize=256)
def _compile_template_pattern(
template: str,
ep_group_name: Optional[str] = None,
):
parts: List[str] = ["^"]
cursor = 0
while cursor < len(template):
if template.startswith("{{", cursor):
parts.append(re.escape("{"))
cursor += 2
continue
if template.startswith("}}", cursor):
parts.append(re.escape("}"))
cursor += 2
continue
if template[cursor] == "{":
end = template.find("}", cursor + 1)
if end < 0:
raise ValueError(f"模板存在未闭合占位符:{template}")
group_name = template[cursor + 1:end]
if not re.fullmatch(r"[A-Za-z_]\w*", group_name):
raise ValueError(f"模板占位符名称无效:{template}")
quantifier = ".+?" if group_name == ep_group_name else ".*?"
parts.append(f"(?P<{group_name}>{quantifier})")
cursor = end + 1
continue
if template[cursor] == "}":
raise ValueError(f"模板存在未转义的右花括号:{template}")
literal_end = cursor
while literal_end < len(template) and template[literal_end] not in "{}":
literal_end += 1
parts.append(re.escape(template[cursor:literal_end]))
cursor = literal_end
parts.append("$")
return re.compile("".join(parts))
def _match_template(
template: str,
text: str,
ep_group_name: Optional[str] = None,
) -> Optional[_TemplateParseResult]:
pattern = _compile_template_pattern(template, ep_group_name)
result = pattern.match(text)
if not result:
return None
group_names = result.groupdict()
return _TemplateParseResult(
named=group_names,
spans={
group_name: result.span(group_name)
for group_name in group_names
},
)
class FormatParser(object):
_key = ""
_split_chars = r"\.|\s+|\(|\)|\[|]|-|\+|【|】|/||;|&|\||#|_|「|」|~"
@@ -41,6 +105,11 @@ class FormatParser(object):
self.__offset = f"EP+{offset}"
self._key = key
self._part = None
self._compiled_pattern = (
_compile_template_pattern(self._format, self._key)
if self._format
else None
)
if part:
self._part = part
if details:
@@ -130,10 +199,10 @@ class FormatParser(object):
"""
if not self._format:
return None, None
ret = parse.parse(self._format, file)
if not ret or not ret.__contains__(self._key):
ret = self._compiled_pattern.match(file) if self._compiled_pattern else None
if not ret or self._key not in ret.groupdict():
return None, None
episodes = ret.__getitem__(self._key)
episodes = ret.group(self._key)
if not re.compile(
r"^([Ee][Pp]?)?(\d{1,4})(-([Ee][Pp]?)?(\d{1,4}))?$",
re.IGNORECASE,
@@ -165,6 +234,7 @@ class EpisodeFormatRuleHelper:
"""
_MIN_MEDIA_FILE_SIZE_BYTES = 100 * 1024 * 1024
_MIN_AUTO_VALID_MEDIA_COVERAGE = 0.6
_EMPTY_META = MetaBase(title="")
_EP_RANGE_RE = re.compile(
@@ -177,12 +247,29 @@ class EpisodeFormatRuleHelper:
_SEASON_EP_RE = re.compile(r"[Ss]\d{1,4}[Ee][Pp]?(\d{1,4})(?!\d)")
_HASH_EP_RE = re.compile(r"(?<!\d)#(\d{1,4})(?!\d)")
_BRACKET_EP_RE = re.compile(r"[\[【](\d{1,4})[\]】]")
_FALLBACK_BRACKET_EP_RE = re.compile(r"[\[【](\d{1,3})[\]】]")
_FALLBACK_EPISODE_RE = re.compile(r"第(\d{1,4})[話话]")
_FALLBACK_EPISODE_JI_RE = re.compile(r"第(\d{1,4})集")
_FALLBACK_PERIOD_RE = re.compile(r"。(\d{1,4})\s")
_CJK_EP_RE = re.compile(r"第(\d{1,4})(?:[話话集])")
_SPECIAL_SAMPLE_RE = re.compile(
r"(?<![A-Za-z0-9])(SP\d+|NCOP|NCED|OP|ED|MENU)(?![A-Za-z0-9])",
r"\[(?:"
r"SP\d+"
r"|NC(?:OP|ED)(?:[_\s-]*EP\d+)?(?:\s+VER\.\d+)?"
r"|OP"
r"|ED"
r"|MENU(?:\d+|OVA)?"
r"|OVA(?:\s+TRAILER)?"
r"|OAD"
r"|PV\d*"
r"|CM(?:\d+| COLLECTION)?"
r"|TRAILER"
r"|WEB PREVIEW(?:\s+\d+)?"
r"|SERIES REVIEW"
r"|TABLE GAME"
r"|TV SPOTS?"
r"|\d+\((?:OVA|OAD|SP)\d+\)"
r")\]",
re.IGNORECASE,
)
@@ -317,7 +404,37 @@ class EpisodeFormatRuleHelper:
None,
)
candidate_media_count = 0
for item in candidates:
if (
self._get_file_kind(item) == "media"
and not self._is_special_sample(item.name or "")
):
candidate_media_count += 1
valid_media_count = 0
for item in valid_samples:
if item.source_kind == "media":
valid_media_count += 1
if (
candidate_media_count > 1
and valid_media_count / candidate_media_count
< self._MIN_AUTO_VALID_MEDIA_COVERAGE
):
logger.warn(
"有效正片样本覆盖率不足,放弃智能生成:"
f"valid_media={valid_media_count}, candidate_media={candidate_media_count}"
)
return False, "有效正片样本覆盖率不足,建议补充集数定位规则", None
majority_samples, clear_majority = self._select_base_samples(valid_samples)
logger.debug(
"自动推荐多数派样本:"
f"valid={len(valid_samples)}, majority={len(majority_samples)}, "
f"clear_majority={clear_majority}, files="
f"{[(sample.file_name, sample.expected_episode, sample.ep_span) for sample in majority_samples]}"
)
if len(valid_samples) > 1 and not clear_majority:
logger.warn("自动生成样本未形成明确多数派,放弃推荐")
return False, "样本命名差异过大,建议补充集数定位规则", None
@@ -328,10 +445,19 @@ class EpisodeFormatRuleHelper:
episode_format = self._build_ep_only_template(
majority_names, majority_spans, use_majority=False
)
logger.debug(
"自动推荐基础模板:"
f"sample={majority_names[0] if majority_names else None}, "
f"span={majority_spans[0] if majority_spans else None}, template={episode_format}"
)
if not self._validate_auto_template(episode_format, majority_samples):
diff_result = self._build_template_with_diff(
majority_names, majority_spans, use_majority=False
)
logger.debug(
"自动推荐差异模板尝试:"
f"base={episode_format}, diff={diff_result}"
)
if diff_result and self._validate_auto_template(
diff_result, majority_samples
):
@@ -495,8 +621,18 @@ class EpisodeFormatRuleHelper:
ep_span = self._locate_episode(file_name, normalized_episode)
if ep_span is None:
logger.debug(
"自动推荐样本跳过:未定位到集数 token - "
f"{file_name} - episode={normalized_episode}"
)
continue
logger.debug(
"自动推荐样本入选:"
f"{file_name} - episode={normalized_episode}, span={ep_span}, "
f"matched={file_name[ep_span[0]:ep_span[1]]}, "
f"kind={self._get_file_kind(item)}"
)
valid_samples.append(
_AutoRecommendSample(
file_name=file_name,
@@ -538,9 +674,9 @@ class EpisodeFormatRuleHelper:
for candidate in cls._build_episode_candidates(normalized_episode_value):
token_pattern = re.compile(
rf"(?:(?<=^)|(?<=[\s._\-\[\]【】()]))"
rf"(?:(?<=^)|(?<=[\s._\-\[\]【】()「」『』《》〈〉〔〕]))"
rf"{re.escape(candidate)}"
rf"(?:(?=$)|(?=[\s._\-\[\]【】()]))"
rf"(?:(?=$)|(?=[\s._\-\[\]【】()「」『』《》〈〉〔〕]))"
)
matches = list(token_pattern.finditer(file_name))
if matches:
@@ -629,6 +765,59 @@ class EpisodeFormatRuleHelper:
return f"{meta.begin_episode}-{meta.end_episode}"
return str(meta.begin_episode)
@classmethod
def _should_degrade_native_conflict(
cls,
file_name: str,
normalized_episode: Optional[str],
native_episode: Optional[str],
) -> bool:
"""
判断原生集数冲突是否应降级处理。
当自动定位到的集数 token 明确出现在文件名后部,而原生识别出来的数字
只出现在更靠前的位置时,通常是标题续作号或目录序号误判,不应继续作
为自动推荐的否决条件。
"""
if not file_name or not normalized_episode or not native_episode:
return False
auto_span = cls._locate_episode(file_name, normalized_episode)
native_span = cls._locate_episode(file_name, native_episode)
if not auto_span or not native_span:
return False
return native_span[1] <= auto_span[0]
@classmethod
def _should_prefer_fallback_episode(
cls,
file_name: str,
anitopy_episode: Optional[Union[str, List[str]]],
fallback_episode: Optional[Union[str, List[str]]],
) -> bool:
"""
当 anitopy 命中了标题前部数字,而 fallback 命中了更靠后的显式集数 token 时,
优先使用 fallback 结果。
"""
if not file_name or not anitopy_episode or not fallback_episode:
return False
normalized_anitopy_episode = cls._normalize_episode_value(anitopy_episode)
normalized_fallback_episode = cls._normalize_episode_value(fallback_episode)
if cls._episode_value_equals(
normalized_anitopy_episode,
normalized_fallback_episode,
):
return False
_, anitopy_end_episode = cls._parse_episode_value(normalized_anitopy_episode)
if anitopy_end_episode is not None:
return False
anitopy_span = cls._locate_episode(file_name, normalized_anitopy_episode)
fallback_span = cls._locate_episode(file_name, normalized_fallback_episode)
if not anitopy_span or not fallback_span:
return False
return anitopy_span[1] <= fallback_span[0]
def _extract_episode_with_native_fallback(
self,
item: FileItem,
@@ -636,23 +825,48 @@ class EpisodeFormatRuleHelper:
file_name = item.name or ""
native_episode = self._extract_native_episode(item)
episode_number = None
anitopy_episode = None
try:
result = anitopy.parse(file_name)
episode_number = result.get("episode_number")
anitopy_episode = episode_number
except Exception as err:
logger.warn(f"anitopy 解析失败:{file_name} - {err}")
fallback_episode = self._extract_episode_fallback(file_name)
if not episode_number:
episode_number = self._extract_episode_fallback(file_name)
episode_number = fallback_episode
elif self._should_prefer_fallback_episode(
file_name,
anitopy_episode,
fallback_episode,
):
episode_number = fallback_episode
normalized_episode = (
self._normalize_episode_value(episode_number)
if episode_number
else None
)
logger.debug(
"自动推荐集数提取:"
f"{file_name} - anitopy={anitopy_episode}, "
f"fallback={fallback_episode}, normalized={normalized_episode}, "
f"native={native_episode}"
)
used_native_fallback = False
native_verified = False
if normalized_episode and native_episode:
if self._episode_value_equals(normalized_episode, native_episode):
native_verified = True
elif self._should_degrade_native_conflict(
file_name,
normalized_episode,
native_episode,
):
logger.info(
"原生集数识别疑似命中标题序号,降级冲突权重:"
f"{file_name} - auto={normalized_episode}, native={native_episode}"
)
native_episode = None
else:
return normalized_episode, native_episode, False, False
elif not normalized_episode and native_episode:
@@ -663,8 +877,22 @@ class EpisodeFormatRuleHelper:
@classmethod
def _extract_episode_fallback(cls, file_name: str) -> Optional[str]:
"""
anitopy 无法识别时的兜底集数提取第xx話 / 第xx话 / 。01 等)
anitopy 无法识别时的兜底集数提取
优先尝试结构更明确的季集/井号/方括号集数,再退回到中日韩常见文案。
"""
match = cls._SEASON_EP_RANGE_RE.search(file_name)
if match:
return match.group(1)
match = cls._SEASON_EP_RE.search(file_name)
if match:
return match.group(1)
hash_matches = list(cls._HASH_EP_RE.finditer(file_name))
if hash_matches:
return hash_matches[-1].group(1)
bracket_matches = list(cls._FALLBACK_BRACKET_EP_RE.finditer(file_name))
if bracket_matches:
return bracket_matches[-1].group(1)
match = cls._FALLBACK_EPISODE_RE.search(file_name)
if match:
return match.group(1)
@@ -926,29 +1154,33 @@ class EpisodeFormatRuleHelper:
candidates = [base_text] + compare_texts
prefix_len = self._common_prefix_length(candidates)
suffix_len = self._common_suffix_length(candidates, prefix_len)
variable_parts = [
text[
prefix_len:
len(text) - suffix_len if suffix_len else len(text)
]
for text in candidates
]
while prefix_len > 0 and any(not part for part in variable_parts):
prefix_len -= 1
variable_parts = [
end_pos = len(base_text) - suffix_len
if prefix_len >= end_pos:
base_part = base_text[prefix_len:end_pos]
compare_parts = [
text[
prefix_len:
len(text) - suffix_len if suffix_len else len(text)
]
for text in candidates
for text in compare_texts
]
if any(not part for part in variable_parts):
if not base_part and any(compare_parts):
return prefix_len, prefix_len
return None
end_pos = len(base_text) - suffix_len
if prefix_len >= end_pos:
base_part = base_text[prefix_len:end_pos]
compare_parts = [
text[
prefix_len:
len(text) - suffix_len if suffix_len else len(text)
]
for text in compare_texts
]
if any(not part for part in [base_part] + compare_parts):
if not base_part and any(compare_parts):
return prefix_len, prefix_len
if base_part and any(part == "" for part in compare_parts):
return prefix_len, end_pos
return None
return prefix_len, end_pos
@@ -989,7 +1221,7 @@ class EpisodeFormatRuleHelper:
template_parts: List[str] = []
cursor = 0
for start, end, name in sorted(spans, key=lambda item: item[0]):
if start < cursor or end <= start:
if start < cursor or end < start:
continue
template_parts.append(
self._escape_literal(base_after_ep[cursor:start])
@@ -1021,6 +1253,10 @@ class EpisodeFormatRuleHelper:
sample.file_name,
context="自动模板校验",
):
logger.debug(
"自动模板校验失败:模板未命中文件 - "
f"template={episode_format}, file={sample.file_name}"
)
return False
start_episode, end_episode, _ = self._safe_split_episode(
parser,
@@ -1032,12 +1268,22 @@ class EpisodeFormatRuleHelper:
end_episode,
sample.expected_episode,
):
logger.debug(
"自动模板校验失败:集数不匹配 - "
f"template={episode_format}, file={sample.file_name}, "
f"expected={sample.expected_episode}, actual={start_episode}-{end_episode}"
)
return False
if sample.native_episode and not self._episode_matches(
start_episode,
end_episode,
sample.native_episode,
):
logger.debug(
"自动模板校验失败:与原生集数不一致 - "
f"template={episode_format}, file={sample.file_name}, "
f"native={sample.native_episode}, actual={start_episode}-{end_episode}"
)
return False
return True
@@ -1188,9 +1434,9 @@ class EpisodeFormatRuleHelper:
template: str,
file_name: str,
context: str,
) -> Optional[parse.Result]:
) -> Optional[_TemplateParseResult]:
try:
return parse.parse(template, file_name)
return _match_template(template, file_name)
except Exception as err:
logger.warn(f"{context} parse 模板解析失败:{template} <- {file_name} - {err}")
return None

View File

@@ -68,6 +68,39 @@ def test_locate_episode_supports_hash_prefix():
assert file_name[start - 1] == "#"
@pytest.mark.parametrize(
"file_name",
[
"Show 01《Title》.mkv",
"Show 01〈Title〉.mkv",
"Show 01Title.mkv",
],
)
def test_locate_episode_supports_east_asian_title_quotes(file_name: str):
helper = EpisodeFormatRuleHelper()
start, end = helper._locate_episode(file_name, "01")
assert file_name[start:end] == "01"
def test_auto_recommend_supports_episode_before_japanese_quote():
helper = EpisodeFormatRuleHelper()
samples = [
_make_file("[U2-Rip]バカとテストと召喚獣 01「バカとクラスと召喚戦爭」(BD 1920x1080 x264 FLACx2).mkv"),
_make_file("[U2-Rip]バカとテストと召喚獣 02「ユリとバラと保健体育」(BD 1920x1080 x264 FLACx2).mkv"),
_make_file("[U2-Rip]バカとテストと召喚獣 03「食費とデートとスタンガン」(BD 1920x1080 x264 FLACx2).mkv"),
]
state, errmsg, data = helper.recommend([], samples)
assert state is True
assert errmsg == ""
assert data["episode_format"] == "[U2-Rip]バカとテストと召喚獣 {ep}{a}」(BD 1920x1080 x264 FLACx2).mkv"
assert data["sample_count"] == 3
assert data["majority_count"] == 3
def test_auto_recommend_returns_low_confidence_for_single_sample():
helper = EpisodeFormatRuleHelper()
sample = _make_file("[Seed-Raws] Tari Tari - 01 (BD 1280x720 AVC AAC).mp4")
@@ -200,7 +233,7 @@ def test_auto_recommend_returns_false_when_parse_raises(monkeypatch):
def _raise_parse(*args, **kwargs):
raise ValueError("broken parse")
monkeypatch.setattr("app.helper.format.parse.parse", _raise_parse)
monkeypatch.setattr("app.helper.format._match_template", _raise_parse)
state, errmsg, data = helper.recommend([], samples)
@@ -350,6 +383,47 @@ def test_auto_recommend_ignores_special_sp_samples():
assert data["episode_format"] == "[Tonikaku Kawaii S2][{ep}][BDRIP][1080P][H264_FLAC].mkv"
def test_auto_recommend_ignores_promotional_special_samples():
helper = EpisodeFormatRuleHelper()
sample_names = [
"[VCB-Studio] Show [01][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [02][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [03][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [PV01][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [CM01][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [Trailer][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [Web Preview 02][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [Series Review][Ma10p_1080p][x265_flac].mkv",
"[VCB-Studio] Show [Table Game][Ma10p_1080p][x265_flac].mkv",
]
samples = [_make_file(name) for name in sample_names]
state, errmsg, data = helper.recommend([], samples)
assert state is True
assert errmsg == ""
assert data["sample_count"] == 3
assert data["majority_count"] == 3
assert data["episode_format"] == "[VCB-Studio] Show [{ep}][Ma10p_1080p][x265_flac].mkv"
def test_auto_recommend_rejects_when_valid_media_coverage_is_too_low():
helper = EpisodeFormatRuleHelper()
sample_names = [
"Show - 01.mkv",
"Show - 02.mkv",
"Show bonus.mkv",
"Show extra.mkv",
]
samples = [_make_file(name) for name in sample_names]
state, errmsg, data = helper.recommend([], samples)
assert state is False
assert errmsg == "有效正片样本覆盖率不足,建议补充集数定位规则"
assert data is None
def test_auto_recommend_uses_native_episode_as_fallback(monkeypatch):
helper = EpisodeFormatRuleHelper()
samples = [
@@ -403,6 +477,180 @@ def test_auto_recommend_rejects_when_native_episode_conflicts(monkeypatch):
assert data is None
def test_auto_recommend_degrades_native_conflict_for_title_sequence_number(monkeypatch):
helper = EpisodeFormatRuleHelper()
samples = [
_make_file("[VCB-Studio] Getsuyoubi no Tawawa 2 [01][Ma10p_1080p][x265_flac].mkv"),
_make_file("[VCB-Studio] Getsuyoubi no Tawawa 2 [02][Ma10p_1080p][x265_flac].mkv"),
]
monkeypatch.setattr(
helper,
"_extract_native_episode",
lambda item: "2",
)
state, errmsg, data = helper.recommend([], samples)
assert state is True
assert errmsg == ""
assert data["native_conflict_count"] == 0
assert data["episode_format"] == (
"[VCB-Studio] Getsuyoubi no Tawawa 2 [{ep}][Ma10p_1080p][x265_flac].mkv"
)
def test_auto_recommend_prefers_bracket_episode_over_title_sequence_native(monkeypatch):
helper = EpisodeFormatRuleHelper()
samples = [
_make_file("[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [01][Ma10p_1080p][x265_flac_aac].mkv"),
_make_file("[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [02][Ma10p_1080p][x265_flac_aac].mkv"),
]
monkeypatch.setattr(
"app.helper.format.anitopy.parse",
lambda _: {},
)
monkeypatch.setattr(
helper,
"_extract_native_episode",
lambda item: "3",
)
state, errmsg, data = helper.recommend([], samples)
assert state is True
assert errmsg == ""
assert data["native_conflict_count"] == 0
assert data["episode_format"] == (
"[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [{ep}][Ma10p_1080p][x265_flac_aac].mkv"
)
def test_auto_recommend_corrects_anitopy_title_sequence_bias(monkeypatch):
helper = EpisodeFormatRuleHelper()
samples = [
_make_file(f"[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [{index:02d}][Ma10p_1080p][x265_flac_aac].mkv")
for index in range(1, 12)
]
def _mock_parse(file_name: str):
episode_text = file_name.split("[", 2)[2].split("]", 1)[0]
if episode_text in {"01", "02"}:
return {"episode_number": episode_text}
return {"episode_number": "3"}
monkeypatch.setattr(
"app.helper.format.anitopy.parse",
_mock_parse,
)
monkeypatch.setattr(
helper,
"_extract_native_episode",
lambda item: "3",
)
state, errmsg, data = helper.recommend([], samples)
assert state is True
assert errmsg == ""
assert data["majority_count"] == 11
assert data["episode_format"] == (
"[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [{ep}][Ma10p_1080p][x265_flac_aac].mkv"
)
def test_auto_recommend_supports_optional_finale_suffix():
helper = EpisodeFormatRuleHelper()
sample_names = [
"[SHIGURE] Rakudai Kishi no Cavalry - 01 [1080p][AVC_Hi10p_FLAC].mkv",
"[SHIGURE] Rakudai Kishi no Cavalry - 02 [1080p][AVC_Hi10p_FLAC].mkv",
"[SHIGURE] Rakudai Kishi no Cavalry - 12 Fin [1080p][AVC_Hi10p_FLAC].mkv",
]
samples = [_make_file(name) for name in sample_names]
state, errmsg, data = helper.recommend([], samples)
assert state is True
assert errmsg == ""
assert data["episode_format"] == (
"[SHIGURE] Rakudai Kishi no Cavalry - {ep} {a}[1080p][AVC_Hi10p_FLAC].mkv"
)
parser = FormatParser(eformat=data["episode_format"])
for sample_name in sample_names:
assert parser.match(sample_name) is True
def test_should_prefer_fallback_episode_when_anitopy_hits_title_sequence():
helper = EpisodeFormatRuleHelper()
assert helper._should_prefer_fallback_episode(
"[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [04][Ma10p_1080p][x265_flac_aac].mkv",
"3",
"04",
) is True
assert helper._should_prefer_fallback_episode(
"Show - 04.mkv",
"04",
"04",
) is False
def test_should_prefer_fallback_episode_preserves_anitopy_multi_episode_list():
helper = EpisodeFormatRuleHelper()
assert helper._should_prefer_fallback_episode(
"Show 3 [01][02].mkv",
["01", "02"],
"02",
) is False
def test_extract_episode_with_native_fallback_keeps_anitopy_range_list(monkeypatch):
helper = EpisodeFormatRuleHelper()
item = _make_file("Show - 01-02 [02].mkv")
monkeypatch.setattr(
"app.helper.format.anitopy.parse",
lambda _: {"episode_number": ["01", "02"]},
)
monkeypatch.setattr(
helper,
"_extract_native_episode",
lambda _: None,
)
normalized_episode, native_episode, used_native_fallback, native_verified = (
helper._extract_episode_with_native_fallback(item)
)
assert normalized_episode == "01-02"
assert native_episode is None
assert used_native_fallback is False
assert native_verified is False
def test_should_degrade_native_conflict_only_for_preceding_title_number():
helper = EpisodeFormatRuleHelper()
assert helper._should_degrade_native_conflict(
"[VCB-Studio] Getsuyoubi no Tawawa 2 [01][Ma10p_1080p][x265_flac].mkv",
"01",
"2",
) is True
assert helper._should_degrade_native_conflict(
"Show - 01.mkv",
"01",
"02",
) is False
def test_calculate_variable_span_keeps_optional_base_suffix_span():
helper = EpisodeFormatRuleHelper()
assert helper._calculate_variable_span(" Fin ", [""]) == (0, 5)
def test_auto_recommend_marks_native_verified_samples(monkeypatch):
helper = EpisodeFormatRuleHelper()
samples = [