mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-29 00:06:27 +08:00
66 lines
2.2 KiB
Python
66 lines
2.2 KiB
Python
from types import SimpleNamespace
|
||
|
||
from app.utils.http import RequestUtils
|
||
|
||
|
||
def test_xml_decoding_prefers_xml_declaration_over_http_default():
|
||
"""
|
||
XML 声明应优先于 HTTP 默认编码,避免 UTF-8 RSS 标题被 latin1 类编码解坏。
|
||
"""
|
||
xml = '<?xml version="1.0" encoding="UTF-8"?><rss><title>警察故事4:简单任务</title></rss>'
|
||
response = SimpleNamespace(
|
||
content=xml.encode("utf-8"),
|
||
encoding="ISO-8859-1",
|
||
apparent_encoding="utf-8",
|
||
text=xml.encode("utf-8").decode("ISO-8859-1"),
|
||
)
|
||
|
||
decoded = RequestUtils.get_decoded_xml_content(response, performance_mode=True)
|
||
|
||
assert "警察故事4:简单任务" in decoded
|
||
assert "è¦" not in decoded
|
||
|
||
|
||
def test_xml_decoding_uses_declared_non_utf8_encoding():
|
||
"""
|
||
XML 声明为非 UTF-8 时应按声明解码,兼容旧站点的 GBK/Big5 类响应。
|
||
"""
|
||
xml = '<?xml version="1.0" encoding="GBK"?><rss><title>中文标题</title></rss>'
|
||
response = SimpleNamespace(
|
||
content=xml.encode("gbk"),
|
||
encoding="ISO-8859-1",
|
||
apparent_encoding="ISO-8859-1",
|
||
text=xml.encode("gbk").decode("ISO-8859-1"),
|
||
)
|
||
|
||
decoded = RequestUtils.get_decoded_xml_content(response, performance_mode=True)
|
||
|
||
assert "中文标题" in decoded
|
||
|
||
|
||
def test_xml_decoding_skips_low_confidence_apparent_encoding():
|
||
"""
|
||
apparent_encoding 为 latin1 类编码时不应抢先解码,避免无 XML 声明的中文 RSS 被吞成乱码。
|
||
"""
|
||
xml = "<rss><title>中文标题</title></rss>"
|
||
response = SimpleNamespace(
|
||
content=xml.encode("gbk"),
|
||
encoding="ISO-8859-1",
|
||
apparent_encoding="ISO-8859-1",
|
||
text=xml.encode("gbk").decode("ISO-8859-1"),
|
||
)
|
||
|
||
decoded = RequestUtils.get_decoded_xml_content(response, performance_mode=True)
|
||
|
||
assert "中文标题" in decoded
|
||
assert "ÖÐÎÄ" not in decoded
|
||
|
||
|
||
def test_latin1_http_encoding_is_low_confidence():
|
||
"""
|
||
latin1 类编码常由 HTTP 客户端默认填充,不能作为 XML/RSS 解码的优先依据。
|
||
"""
|
||
assert RequestUtils.is_low_confidence_http_encoding("ISO-8859-1")
|
||
assert RequestUtils.is_low_confidence_http_encoding("latin-1")
|
||
assert not RequestUtils.is_low_confidence_http_encoding("utf-8")
|