From b395d820d8c3ecac0dbd5f6ce1ce1136d533c24d Mon Sep 17 00:00:00 2001 From: Album <51018113+Mister-album@users.noreply.github.com> Date: Sun, 21 Jun 2026 07:34:57 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=8C=E5=96=84=E9=9B=86=E6=95=B0?= =?UTF-8?q?=E5=AE=9A=E4=BD=8D=E6=A8=A1=E6=9D=BF=E8=AF=86=E5=88=AB=E4=B8=8E?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E6=8E=A8=E8=8D=90=E7=A8=B3=E5=AE=9A=E6=80=A7?= =?UTF-8?q?=20(#5978)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/helper/format.py | 304 +++++++++++++++++++++++++--- tests/test_episode_format_helper.py | 250 ++++++++++++++++++++++- 2 files changed, 524 insertions(+), 30 deletions(-) diff --git a/app/helper/format.py b/app/helper/format.py index 52bf9dc7..9b9c7b74 100644 --- a/app/helper/format.py +++ b/app/helper/format.py @@ -1,11 +1,11 @@ import re from collections import defaultdict from dataclasses import dataclass +from functools import lru_cache from pathlib import Path -from typing import Dict, Iterable, List, Match, Optional, Tuple +from typing import Dict, Iterable, List, Match, Optional, Tuple, Union import anitopy -import parse from app.core.config import settings from app.core.metainfo import MetaInfoPath @@ -14,6 +14,70 @@ from app.log import logger from app.schemas import EpisodeFormatRule, FileItem +@dataclass(frozen=True) +class _TemplateParseResult: + named: Dict[str, str] + spans: Dict[str, Tuple[int, int]] + + +@lru_cache(maxsize=256) +def _compile_template_pattern( + template: str, + ep_group_name: Optional[str] = None, +): + parts: List[str] = ["^"] + cursor = 0 + while cursor < len(template): + if template.startswith("{{", cursor): + parts.append(re.escape("{")) + cursor += 2 + continue + if template.startswith("}}", cursor): + parts.append(re.escape("}")) + cursor += 2 + continue + if template[cursor] == "{": + end = template.find("}", cursor + 1) + if end < 0: + raise ValueError(f"模板存在未闭合占位符:{template}") + group_name = template[cursor + 1:end] + if not re.fullmatch(r"[A-Za-z_]\w*", group_name): + raise ValueError(f"模板占位符名称无效:{template}") + quantifier = ".+?" if group_name == ep_group_name else ".*?" + parts.append(f"(?P<{group_name}>{quantifier})") + cursor = end + 1 + continue + if template[cursor] == "}": + raise ValueError(f"模板存在未转义的右花括号:{template}") + + literal_end = cursor + while literal_end < len(template) and template[literal_end] not in "{}": + literal_end += 1 + parts.append(re.escape(template[cursor:literal_end])) + cursor = literal_end + parts.append("$") + return re.compile("".join(parts)) + + +def _match_template( + template: str, + text: str, + ep_group_name: Optional[str] = None, +) -> Optional[_TemplateParseResult]: + pattern = _compile_template_pattern(template, ep_group_name) + result = pattern.match(text) + if not result: + return None + group_names = result.groupdict() + return _TemplateParseResult( + named=group_names, + spans={ + group_name: result.span(group_name) + for group_name in group_names + }, + ) + + class FormatParser(object): _key = "" _split_chars = r"\.|\s+|\(|\)|\[|]|-|\+|【|】|/|~|;|&|\||#|_|「|」|~" @@ -41,6 +105,11 @@ class FormatParser(object): self.__offset = f"EP+{offset}" self._key = key self._part = None + self._compiled_pattern = ( + _compile_template_pattern(self._format, self._key) + if self._format + else None + ) if part: self._part = part if details: @@ -130,10 +199,10 @@ class FormatParser(object): """ if not self._format: return None, None - ret = parse.parse(self._format, file) - if not ret or not ret.__contains__(self._key): + ret = self._compiled_pattern.match(file) if self._compiled_pattern else None + if not ret or self._key not in ret.groupdict(): return None, None - episodes = ret.__getitem__(self._key) + episodes = ret.group(self._key) if not re.compile( r"^([Ee][Pp]?)?(\d{1,4})(-([Ee][Pp]?)?(\d{1,4}))?$", re.IGNORECASE, @@ -165,6 +234,7 @@ class EpisodeFormatRuleHelper: """ _MIN_MEDIA_FILE_SIZE_BYTES = 100 * 1024 * 1024 + _MIN_AUTO_VALID_MEDIA_COVERAGE = 0.6 _EMPTY_META = MetaBase(title="") _EP_RANGE_RE = re.compile( @@ -177,12 +247,29 @@ class EpisodeFormatRuleHelper: _SEASON_EP_RE = re.compile(r"[Ss]\d{1,4}[Ee][Pp]?(\d{1,4})(?!\d)") _HASH_EP_RE = re.compile(r"(? 1 + and valid_media_count / candidate_media_count + < self._MIN_AUTO_VALID_MEDIA_COVERAGE + ): + logger.warn( + "有效正片样本覆盖率不足,放弃智能生成:" + f"valid_media={valid_media_count}, candidate_media={candidate_media_count}" + ) + return False, "有效正片样本覆盖率不足,建议补充集数定位规则", None + majority_samples, clear_majority = self._select_base_samples(valid_samples) + logger.debug( + "自动推荐多数派样本:" + f"valid={len(valid_samples)}, majority={len(majority_samples)}, " + f"clear_majority={clear_majority}, files=" + f"{[(sample.file_name, sample.expected_episode, sample.ep_span) for sample in majority_samples]}" + ) if len(valid_samples) > 1 and not clear_majority: logger.warn("自动生成样本未形成明确多数派,放弃推荐") return False, "样本命名差异过大,建议补充集数定位规则", None @@ -328,10 +445,19 @@ class EpisodeFormatRuleHelper: episode_format = self._build_ep_only_template( majority_names, majority_spans, use_majority=False ) + logger.debug( + "自动推荐基础模板:" + f"sample={majority_names[0] if majority_names else None}, " + f"span={majority_spans[0] if majority_spans else None}, template={episode_format}" + ) if not self._validate_auto_template(episode_format, majority_samples): diff_result = self._build_template_with_diff( majority_names, majority_spans, use_majority=False ) + logger.debug( + "自动推荐差异模板尝试:" + f"base={episode_format}, diff={diff_result}" + ) if diff_result and self._validate_auto_template( diff_result, majority_samples ): @@ -495,8 +621,18 @@ class EpisodeFormatRuleHelper: ep_span = self._locate_episode(file_name, normalized_episode) if ep_span is None: + logger.debug( + "自动推荐样本跳过:未定位到集数 token - " + f"{file_name} - episode={normalized_episode}" + ) continue + logger.debug( + "自动推荐样本入选:" + f"{file_name} - episode={normalized_episode}, span={ep_span}, " + f"matched={file_name[ep_span[0]:ep_span[1]]}, " + f"kind={self._get_file_kind(item)}" + ) valid_samples.append( _AutoRecommendSample( file_name=file_name, @@ -538,9 +674,9 @@ class EpisodeFormatRuleHelper: for candidate in cls._build_episode_candidates(normalized_episode_value): token_pattern = re.compile( - rf"(?:(?<=^)|(?<=[\s._\-\[\]【】()]))" + rf"(?:(?<=^)|(?<=[\s._\-\[\]【】()「」『』《》〈〉〔〕]))" rf"{re.escape(candidate)}" - rf"(?:(?=$)|(?=[\s._\-\[\]【】()]))" + rf"(?:(?=$)|(?=[\s._\-\[\]【】()「」『』《》〈〉〔〕]))" ) matches = list(token_pattern.finditer(file_name)) if matches: @@ -629,6 +765,59 @@ class EpisodeFormatRuleHelper: return f"{meta.begin_episode}-{meta.end_episode}" return str(meta.begin_episode) + @classmethod + def _should_degrade_native_conflict( + cls, + file_name: str, + normalized_episode: Optional[str], + native_episode: Optional[str], + ) -> bool: + """ + 判断原生集数冲突是否应降级处理。 + + 当自动定位到的集数 token 明确出现在文件名后部,而原生识别出来的数字 + 只出现在更靠前的位置时,通常是标题续作号或目录序号误判,不应继续作 + 为自动推荐的否决条件。 + """ + if not file_name or not normalized_episode or not native_episode: + return False + + auto_span = cls._locate_episode(file_name, normalized_episode) + native_span = cls._locate_episode(file_name, native_episode) + if not auto_span or not native_span: + return False + return native_span[1] <= auto_span[0] + + @classmethod + def _should_prefer_fallback_episode( + cls, + file_name: str, + anitopy_episode: Optional[Union[str, List[str]]], + fallback_episode: Optional[Union[str, List[str]]], + ) -> bool: + """ + 当 anitopy 命中了标题前部数字,而 fallback 命中了更靠后的显式集数 token 时, + 优先使用 fallback 结果。 + """ + if not file_name or not anitopy_episode or not fallback_episode: + return False + normalized_anitopy_episode = cls._normalize_episode_value(anitopy_episode) + normalized_fallback_episode = cls._normalize_episode_value(fallback_episode) + if cls._episode_value_equals( + normalized_anitopy_episode, + normalized_fallback_episode, + ): + return False + _, anitopy_end_episode = cls._parse_episode_value(normalized_anitopy_episode) + if anitopy_end_episode is not None: + return False + + anitopy_span = cls._locate_episode(file_name, normalized_anitopy_episode) + fallback_span = cls._locate_episode(file_name, normalized_fallback_episode) + if not anitopy_span or not fallback_span: + return False + return anitopy_span[1] <= fallback_span[0] + def _extract_episode_with_native_fallback( self, item: FileItem, @@ -636,23 +825,48 @@ class EpisodeFormatRuleHelper: file_name = item.name or "" native_episode = self._extract_native_episode(item) episode_number = None + anitopy_episode = None try: result = anitopy.parse(file_name) episode_number = result.get("episode_number") + anitopy_episode = episode_number except Exception as err: logger.warn(f"anitopy 解析失败:{file_name} - {err}") + fallback_episode = self._extract_episode_fallback(file_name) if not episode_number: - episode_number = self._extract_episode_fallback(file_name) + episode_number = fallback_episode + elif self._should_prefer_fallback_episode( + file_name, + anitopy_episode, + fallback_episode, + ): + episode_number = fallback_episode normalized_episode = ( self._normalize_episode_value(episode_number) if episode_number else None ) + logger.debug( + "自动推荐集数提取:" + f"{file_name} - anitopy={anitopy_episode}, " + f"fallback={fallback_episode}, normalized={normalized_episode}, " + f"native={native_episode}" + ) used_native_fallback = False native_verified = False if normalized_episode and native_episode: if self._episode_value_equals(normalized_episode, native_episode): native_verified = True + elif self._should_degrade_native_conflict( + file_name, + normalized_episode, + native_episode, + ): + logger.info( + "原生集数识别疑似命中标题序号,降级冲突权重:" + f"{file_name} - auto={normalized_episode}, native={native_episode}" + ) + native_episode = None else: return normalized_episode, native_episode, False, False elif not normalized_episode and native_episode: @@ -663,8 +877,22 @@ class EpisodeFormatRuleHelper: @classmethod def _extract_episode_fallback(cls, file_name: str) -> Optional[str]: """ - anitopy 无法识别时的兜底集数提取(第xx話 / 第xx话 / 。01 等) + anitopy 无法识别时的兜底集数提取。 + + 优先尝试结构更明确的季集/井号/方括号集数,再退回到中日韩常见文案。 """ + match = cls._SEASON_EP_RANGE_RE.search(file_name) + if match: + return match.group(1) + match = cls._SEASON_EP_RE.search(file_name) + if match: + return match.group(1) + hash_matches = list(cls._HASH_EP_RE.finditer(file_name)) + if hash_matches: + return hash_matches[-1].group(1) + bracket_matches = list(cls._FALLBACK_BRACKET_EP_RE.finditer(file_name)) + if bracket_matches: + return bracket_matches[-1].group(1) match = cls._FALLBACK_EPISODE_RE.search(file_name) if match: return match.group(1) @@ -926,29 +1154,33 @@ class EpisodeFormatRuleHelper: candidates = [base_text] + compare_texts prefix_len = self._common_prefix_length(candidates) suffix_len = self._common_suffix_length(candidates, prefix_len) - - variable_parts = [ - text[ - prefix_len: - len(text) - suffix_len if suffix_len else len(text) - ] - for text in candidates - ] - while prefix_len > 0 and any(not part for part in variable_parts): - prefix_len -= 1 - variable_parts = [ + end_pos = len(base_text) - suffix_len + if prefix_len >= end_pos: + base_part = base_text[prefix_len:end_pos] + compare_parts = [ text[ prefix_len: len(text) - suffix_len if suffix_len else len(text) ] - for text in candidates + for text in compare_texts ] - - if any(not part for part in variable_parts): + if not base_part and any(compare_parts): + return prefix_len, prefix_len return None - end_pos = len(base_text) - suffix_len - if prefix_len >= end_pos: + base_part = base_text[prefix_len:end_pos] + compare_parts = [ + text[ + prefix_len: + len(text) - suffix_len if suffix_len else len(text) + ] + for text in compare_texts + ] + if any(not part for part in [base_part] + compare_parts): + if not base_part and any(compare_parts): + return prefix_len, prefix_len + if base_part and any(part == "" for part in compare_parts): + return prefix_len, end_pos return None return prefix_len, end_pos @@ -989,7 +1221,7 @@ class EpisodeFormatRuleHelper: template_parts: List[str] = [] cursor = 0 for start, end, name in sorted(spans, key=lambda item: item[0]): - if start < cursor or end <= start: + if start < cursor or end < start: continue template_parts.append( self._escape_literal(base_after_ep[cursor:start]) @@ -1021,6 +1253,10 @@ class EpisodeFormatRuleHelper: sample.file_name, context="自动模板校验", ): + logger.debug( + "自动模板校验失败:模板未命中文件 - " + f"template={episode_format}, file={sample.file_name}" + ) return False start_episode, end_episode, _ = self._safe_split_episode( parser, @@ -1032,12 +1268,22 @@ class EpisodeFormatRuleHelper: end_episode, sample.expected_episode, ): + logger.debug( + "自动模板校验失败:集数不匹配 - " + f"template={episode_format}, file={sample.file_name}, " + f"expected={sample.expected_episode}, actual={start_episode}-{end_episode}" + ) return False if sample.native_episode and not self._episode_matches( start_episode, end_episode, sample.native_episode, ): + logger.debug( + "自动模板校验失败:与原生集数不一致 - " + f"template={episode_format}, file={sample.file_name}, " + f"native={sample.native_episode}, actual={start_episode}-{end_episode}" + ) return False return True @@ -1188,9 +1434,9 @@ class EpisodeFormatRuleHelper: template: str, file_name: str, context: str, - ) -> Optional[parse.Result]: + ) -> Optional[_TemplateParseResult]: try: - return parse.parse(template, file_name) + return _match_template(template, file_name) except Exception as err: logger.warn(f"{context} parse 模板解析失败:{template} <- {file_name} - {err}") return None diff --git a/tests/test_episode_format_helper.py b/tests/test_episode_format_helper.py index 0a9b572e..5975028a 100644 --- a/tests/test_episode_format_helper.py +++ b/tests/test_episode_format_helper.py @@ -68,6 +68,39 @@ def test_locate_episode_supports_hash_prefix(): assert file_name[start - 1] == "#" +@pytest.mark.parametrize( + "file_name", + [ + "Show 01《Title》.mkv", + "Show 01〈Title〉.mkv", + "Show 01〔Title〕.mkv", + ], +) +def test_locate_episode_supports_east_asian_title_quotes(file_name: str): + helper = EpisodeFormatRuleHelper() + + start, end = helper._locate_episode(file_name, "01") + + assert file_name[start:end] == "01" + + +def test_auto_recommend_supports_episode_before_japanese_quote(): + helper = EpisodeFormatRuleHelper() + samples = [ + _make_file("[U2-Rip]バカとテストと召喚獣 01「バカとクラスと召喚戦爭」(BD 1920x1080 x264 FLACx2).mkv"), + _make_file("[U2-Rip]バカとテストと召喚獣 02「ユリとバラと保健体育」(BD 1920x1080 x264 FLACx2).mkv"), + _make_file("[U2-Rip]バカとテストと召喚獣 03「食費とデートとスタンガン」(BD 1920x1080 x264 FLACx2).mkv"), + ] + + state, errmsg, data = helper.recommend([], samples) + + assert state is True + assert errmsg == "" + assert data["episode_format"] == "[U2-Rip]バカとテストと召喚獣 {ep}「{a}」(BD 1920x1080 x264 FLACx2).mkv" + assert data["sample_count"] == 3 + assert data["majority_count"] == 3 + + def test_auto_recommend_returns_low_confidence_for_single_sample(): helper = EpisodeFormatRuleHelper() sample = _make_file("[Seed-Raws] Tari Tari - 01 (BD 1280x720 AVC AAC).mp4") @@ -200,7 +233,7 @@ def test_auto_recommend_returns_false_when_parse_raises(monkeypatch): def _raise_parse(*args, **kwargs): raise ValueError("broken parse") - monkeypatch.setattr("app.helper.format.parse.parse", _raise_parse) + monkeypatch.setattr("app.helper.format._match_template", _raise_parse) state, errmsg, data = helper.recommend([], samples) @@ -350,6 +383,47 @@ def test_auto_recommend_ignores_special_sp_samples(): assert data["episode_format"] == "[Tonikaku Kawaii S2][{ep}][BDRIP][1080P][H264_FLAC].mkv" +def test_auto_recommend_ignores_promotional_special_samples(): + helper = EpisodeFormatRuleHelper() + sample_names = [ + "[VCB-Studio] Show [01][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [02][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [03][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [PV01][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [CM01][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [Trailer][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [Web Preview 02][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [Series Review][Ma10p_1080p][x265_flac].mkv", + "[VCB-Studio] Show [Table Game][Ma10p_1080p][x265_flac].mkv", + ] + samples = [_make_file(name) for name in sample_names] + + state, errmsg, data = helper.recommend([], samples) + + assert state is True + assert errmsg == "" + assert data["sample_count"] == 3 + assert data["majority_count"] == 3 + assert data["episode_format"] == "[VCB-Studio] Show [{ep}][Ma10p_1080p][x265_flac].mkv" + + +def test_auto_recommend_rejects_when_valid_media_coverage_is_too_low(): + helper = EpisodeFormatRuleHelper() + sample_names = [ + "Show - 01.mkv", + "Show - 02.mkv", + "Show bonus.mkv", + "Show extra.mkv", + ] + samples = [_make_file(name) for name in sample_names] + + state, errmsg, data = helper.recommend([], samples) + + assert state is False + assert errmsg == "有效正片样本覆盖率不足,建议补充集数定位规则" + assert data is None + + def test_auto_recommend_uses_native_episode_as_fallback(monkeypatch): helper = EpisodeFormatRuleHelper() samples = [ @@ -403,6 +477,180 @@ def test_auto_recommend_rejects_when_native_episode_conflicts(monkeypatch): assert data is None +def test_auto_recommend_degrades_native_conflict_for_title_sequence_number(monkeypatch): + helper = EpisodeFormatRuleHelper() + samples = [ + _make_file("[VCB-Studio] Getsuyoubi no Tawawa 2 [01][Ma10p_1080p][x265_flac].mkv"), + _make_file("[VCB-Studio] Getsuyoubi no Tawawa 2 [02][Ma10p_1080p][x265_flac].mkv"), + ] + + monkeypatch.setattr( + helper, + "_extract_native_episode", + lambda item: "2", + ) + + state, errmsg, data = helper.recommend([], samples) + + assert state is True + assert errmsg == "" + assert data["native_conflict_count"] == 0 + assert data["episode_format"] == ( + "[VCB-Studio] Getsuyoubi no Tawawa 2 [{ep}][Ma10p_1080p][x265_flac].mkv" + ) + + +def test_auto_recommend_prefers_bracket_episode_over_title_sequence_native(monkeypatch): + helper = EpisodeFormatRuleHelper() + samples = [ + _make_file("[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [01][Ma10p_1080p][x265_flac_aac].mkv"), + _make_file("[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [02][Ma10p_1080p][x265_flac_aac].mkv"), + ] + + monkeypatch.setattr( + "app.helper.format.anitopy.parse", + lambda _: {}, + ) + monkeypatch.setattr( + helper, + "_extract_native_episode", + lambda item: "3", + ) + + state, errmsg, data = helper.recommend([], samples) + + assert state is True + assert errmsg == "" + assert data["native_conflict_count"] == 0 + assert data["episode_format"] == ( + "[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [{ep}][Ma10p_1080p][x265_flac_aac].mkv" + ) + + +def test_auto_recommend_corrects_anitopy_title_sequence_bias(monkeypatch): + helper = EpisodeFormatRuleHelper() + samples = [ + _make_file(f"[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [{index:02d}][Ma10p_1080p][x265_flac_aac].mkv") + for index in range(1, 12) + ] + + def _mock_parse(file_name: str): + episode_text = file_name.split("[", 2)[2].split("]", 1)[0] + if episode_text in {"01", "02"}: + return {"episode_number": episode_text} + return {"episode_number": "3"} + + monkeypatch.setattr( + "app.helper.format.anitopy.parse", + _mock_parse, + ) + monkeypatch.setattr( + helper, + "_extract_native_episode", + lambda item: "3", + ) + + state, errmsg, data = helper.recommend([], samples) + + assert state is True + assert errmsg == "" + assert data["majority_count"] == 11 + assert data["episode_format"] == ( + "[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [{ep}][Ma10p_1080p][x265_flac_aac].mkv" + ) + + +def test_auto_recommend_supports_optional_finale_suffix(): + helper = EpisodeFormatRuleHelper() + sample_names = [ + "[SHIGURE] Rakudai Kishi no Cavalry - 01 [1080p][AVC_Hi10p_FLAC].mkv", + "[SHIGURE] Rakudai Kishi no Cavalry - 02 [1080p][AVC_Hi10p_FLAC].mkv", + "[SHIGURE] Rakudai Kishi no Cavalry - 12 Fin [1080p][AVC_Hi10p_FLAC].mkv", + ] + samples = [_make_file(name) for name in sample_names] + + state, errmsg, data = helper.recommend([], samples) + + assert state is True + assert errmsg == "" + assert data["episode_format"] == ( + "[SHIGURE] Rakudai Kishi no Cavalry - {ep} {a}[1080p][AVC_Hi10p_FLAC].mkv" + ) + parser = FormatParser(eformat=data["episode_format"]) + for sample_name in sample_names: + assert parser.match(sample_name) is True + + +def test_should_prefer_fallback_episode_when_anitopy_hits_title_sequence(): + helper = EpisodeFormatRuleHelper() + + assert helper._should_prefer_fallback_episode( + "[VCB-Studio] Kono Subarashii Sekai ni Shukufuku wo! 3 [04][Ma10p_1080p][x265_flac_aac].mkv", + "3", + "04", + ) is True + assert helper._should_prefer_fallback_episode( + "Show - 04.mkv", + "04", + "04", + ) is False + + +def test_should_prefer_fallback_episode_preserves_anitopy_multi_episode_list(): + helper = EpisodeFormatRuleHelper() + + assert helper._should_prefer_fallback_episode( + "Show 3 [01][02].mkv", + ["01", "02"], + "02", + ) is False + + +def test_extract_episode_with_native_fallback_keeps_anitopy_range_list(monkeypatch): + helper = EpisodeFormatRuleHelper() + item = _make_file("Show - 01-02 [02].mkv") + + monkeypatch.setattr( + "app.helper.format.anitopy.parse", + lambda _: {"episode_number": ["01", "02"]}, + ) + monkeypatch.setattr( + helper, + "_extract_native_episode", + lambda _: None, + ) + + normalized_episode, native_episode, used_native_fallback, native_verified = ( + helper._extract_episode_with_native_fallback(item) + ) + + assert normalized_episode == "01-02" + assert native_episode is None + assert used_native_fallback is False + assert native_verified is False + + +def test_should_degrade_native_conflict_only_for_preceding_title_number(): + helper = EpisodeFormatRuleHelper() + + assert helper._should_degrade_native_conflict( + "[VCB-Studio] Getsuyoubi no Tawawa 2 [01][Ma10p_1080p][x265_flac].mkv", + "01", + "2", + ) is True + assert helper._should_degrade_native_conflict( + "Show - 01.mkv", + "01", + "02", + ) is False + + +def test_calculate_variable_span_keeps_optional_base_suffix_span(): + helper = EpisodeFormatRuleHelper() + + assert helper._calculate_variable_span(" Fin ", [""]) == (0, 5) + + def test_auto_recommend_marks_native_verified_samples(monkeypatch): helper = EpisodeFormatRuleHelper() samples = [