Files
MoviePilot/tests/test_http_encoding.py
2026-06-26 06:37:18 +08:00

66 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from types import SimpleNamespace
from app.utils.http import RequestUtils
def test_xml_decoding_prefers_xml_declaration_over_http_default():
"""
XML 声明应优先于 HTTP 默认编码,避免 UTF-8 RSS 标题被 latin1 类编码解坏。
"""
xml = '<?xml version="1.0" encoding="UTF-8"?><rss><title>警察故事4简单任务</title></rss>'
response = SimpleNamespace(
content=xml.encode("utf-8"),
encoding="ISO-8859-1",
apparent_encoding="utf-8",
text=xml.encode("utf-8").decode("ISO-8859-1"),
)
decoded = RequestUtils.get_decoded_xml_content(response, performance_mode=True)
assert "警察故事4简单任务" in decoded
assert "è­¦" not in decoded
def test_xml_decoding_uses_declared_non_utf8_encoding():
"""
XML 声明为非 UTF-8 时应按声明解码,兼容旧站点的 GBK/Big5 类响应。
"""
xml = '<?xml version="1.0" encoding="GBK"?><rss><title>中文标题</title></rss>'
response = SimpleNamespace(
content=xml.encode("gbk"),
encoding="ISO-8859-1",
apparent_encoding="ISO-8859-1",
text=xml.encode("gbk").decode("ISO-8859-1"),
)
decoded = RequestUtils.get_decoded_xml_content(response, performance_mode=True)
assert "中文标题" in decoded
def test_xml_decoding_skips_low_confidence_apparent_encoding():
"""
apparent_encoding 为 latin1 类编码时不应抢先解码,避免无 XML 声明的中文 RSS 被吞成乱码。
"""
xml = "<rss><title>中文标题</title></rss>"
response = SimpleNamespace(
content=xml.encode("gbk"),
encoding="ISO-8859-1",
apparent_encoding="ISO-8859-1",
text=xml.encode("gbk").decode("ISO-8859-1"),
)
decoded = RequestUtils.get_decoded_xml_content(response, performance_mode=True)
assert "中文标题" in decoded
assert "ÖÐÎÄ" not in decoded
def test_latin1_http_encoding_is_low_confidence():
"""
latin1 类编码常由 HTTP 客户端默认填充,不能作为 XML/RSS 解码的优先依据。
"""
assert RequestUtils.is_low_confidence_http_encoding("ISO-8859-1")
assert RequestUtils.is_low_confidence_http_encoding("latin-1")
assert not RequestUtils.is_low_confidence_http_encoding("utf-8")