mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-07-03 02:17:19 +08:00
Handle Audiences new message list previews
This commit is contained in:
@@ -24,6 +24,7 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
self._sys_mail_unread_page = None
|
||||
self.__next_mail_page = 1
|
||||
self.__seen_unread_message_links = set()
|
||||
self.__message_list_previews = {}
|
||||
|
||||
def _parse_message_unread(self, html_text):
|
||||
"""
|
||||
@@ -64,10 +65,8 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
if not StringUtils.is_valid_html_element(html):
|
||||
return None
|
||||
|
||||
message_links = html.xpath(
|
||||
'//tr[.//img[contains(concat(" ", normalize-space(@class), " "), " unreadpm ") '
|
||||
'or @alt="Unread" or @title="未读"]]/td/a[contains(@href, "viewmessage")]/@href'
|
||||
)
|
||||
message_links = self.__parse_table_unread_message_links(html)
|
||||
message_links.extend(self.__parse_pm_item_unread_message_links(html))
|
||||
new_message_links = self.__filter_new_message_links(message_links)
|
||||
if message_links and not new_message_links:
|
||||
logger.warn(f"{self._site_name} 未读消息页只发现重复消息链接,停止后续翻页")
|
||||
@@ -81,6 +80,30 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
|
||||
return next_page
|
||||
|
||||
def _parse_message_content(self, html_text):
|
||||
"""
|
||||
解析 Audiences 新版短消息详情页。
|
||||
"""
|
||||
html = etree.HTML(html_text)
|
||||
try:
|
||||
if StringUtils.is_valid_html_element(html):
|
||||
head = self.__extract_first_text(
|
||||
html,
|
||||
'//*[contains(concat(" ", normalize-space(@class), " "), " pm-hero__title ")]'
|
||||
)
|
||||
date = self.__extract_pm_view_meta(html, "日期")
|
||||
content = self.__extract_first_text(
|
||||
html,
|
||||
'//*[contains(concat(" ", normalize-space(@class), " "), " pm-view__body ")]'
|
||||
)
|
||||
if not self.__is_empty_message_content(head, date, content):
|
||||
return head, date, content
|
||||
finally:
|
||||
if html is not None:
|
||||
del html
|
||||
|
||||
return super()._parse_message_content(html_text)
|
||||
|
||||
def _pase_unread_msgs(self):
|
||||
"""
|
||||
解析 Audiences 未读消息,避免异常分页重复通知和空详情通知。
|
||||
@@ -110,6 +133,7 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
headers=self._mail_content_headers
|
||||
)
|
||||
)
|
||||
head, date, content = self.__fill_empty_message_content_from_list(msg_link, head, date, content)
|
||||
logger.debug(f"{self._site_name} 标题 {head} 时间 {date} 内容 {content}")
|
||||
if self.__is_empty_message_content(head, date, content):
|
||||
logger.warn(f"{self._site_name} 信息链接 {msg_link} 解析结果为空,跳过消息通知")
|
||||
@@ -137,6 +161,7 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
"""
|
||||
self.__next_mail_page = 1
|
||||
self.__seen_unread_message_links.clear()
|
||||
self.__message_list_previews.clear()
|
||||
|
||||
def __filter_new_message_links(self, message_links: list) -> list:
|
||||
"""
|
||||
@@ -151,6 +176,118 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
new_message_links.append(message_link)
|
||||
return new_message_links
|
||||
|
||||
@staticmethod
|
||||
def __parse_table_unread_message_links(html) -> list:
|
||||
"""
|
||||
解析 Audiences 旧版表格消息列表中的未读消息链接。
|
||||
"""
|
||||
return html.xpath(
|
||||
'//tr[.//img[contains(concat(" ", normalize-space(@class), " "), " unreadpm ") '
|
||||
'or @alt="Unread" or @title="未读"]]/td/a[contains(@href, "viewmessage")]/@href'
|
||||
)
|
||||
|
||||
def __parse_pm_item_unread_message_links(self, html) -> list:
|
||||
"""
|
||||
解析 Audiences 新版 pm-item 私信列表中的未读消息链接。
|
||||
"""
|
||||
message_links = []
|
||||
unread_rows = html.xpath(
|
||||
'//*[contains(concat(" ", normalize-space(@class), " "), " pm-item-row ") '
|
||||
'and contains(concat(" ", normalize-space(@class), " "), " is-unread ")]'
|
||||
)
|
||||
if not unread_rows:
|
||||
unread_rows = html.xpath(
|
||||
'//*[contains(concat(" ", normalize-space(@class), " "), " pm-item-row ") '
|
||||
'and .//*[contains(concat(" ", normalize-space(@class), " "), " pm-item__status--unread ") '
|
||||
'or @title="未读"]]'
|
||||
)
|
||||
|
||||
for row in unread_rows:
|
||||
row_links = row.xpath('.//a[contains(@href, "viewmessage")]/@href')
|
||||
if not row_links:
|
||||
continue
|
||||
message_link = row_links[0].strip()
|
||||
if not message_link:
|
||||
continue
|
||||
message_links.append(message_link)
|
||||
self.__cache_pm_item_preview(message_link, row)
|
||||
return message_links
|
||||
|
||||
def __cache_pm_item_preview(self, message_link: str, row):
|
||||
"""
|
||||
缓存新版列表页预览,用于详情页结构变化时兜底生成站点消息。
|
||||
"""
|
||||
head = self.__extract_pm_item_text(
|
||||
row,
|
||||
'.//*[contains(concat(" ", normalize-space(@class), " "), " pm-item__subject ")]'
|
||||
)
|
||||
date = self.__extract_pm_item_text(
|
||||
row,
|
||||
'.//*[contains(concat(" ", normalize-space(@class), " "), " pm-item__time ")]'
|
||||
)
|
||||
content = self.__extract_pm_item_text(
|
||||
row,
|
||||
'.//*[contains(concat(" ", normalize-space(@class), " "), " pm-item__preview ")]'
|
||||
)
|
||||
self.__message_list_previews[urljoin(self._base_url, message_link)] = (head, date, content)
|
||||
|
||||
@staticmethod
|
||||
def __extract_pm_item_text(row, xpath: str):
|
||||
"""
|
||||
提取新版私信列表节点文本并规整空白字符。
|
||||
"""
|
||||
nodes = row.xpath(xpath)
|
||||
if not nodes:
|
||||
return None
|
||||
text = nodes[0].xpath("string(.)")
|
||||
text = re.sub(r"\s+", " ", text.replace("\xa0", " ")).strip()
|
||||
return text or None
|
||||
|
||||
@staticmethod
|
||||
def __extract_first_text(html, xpath: str):
|
||||
"""
|
||||
提取第一个匹配节点的规整文本。
|
||||
"""
|
||||
nodes = html.xpath(xpath)
|
||||
if not nodes:
|
||||
return None
|
||||
return NexusAudiencesSiteUserInfo.__normalize_text(nodes[0].xpath("string(.)"))
|
||||
|
||||
@staticmethod
|
||||
def __extract_pm_view_meta(html, label: str):
|
||||
"""
|
||||
按标签提取 Audiences 新版短消息详情页中的元信息。
|
||||
"""
|
||||
values = html.xpath(
|
||||
'//*[contains(concat(" ", normalize-space(@class), " "), " pm-view__meta ") '
|
||||
f'and .//*[contains(concat(" ", normalize-space(@class), " "), " pm-view__label ") '
|
||||
f'and normalize-space()="{label}"]]'
|
||||
'//*[contains(concat(" ", normalize-space(@class), " "), " pm-view__value ")]'
|
||||
)
|
||||
if not values:
|
||||
return None
|
||||
return NexusAudiencesSiteUserInfo.__normalize_text(values[0].xpath("string(.)"))
|
||||
|
||||
@staticmethod
|
||||
def __normalize_text(text: str):
|
||||
"""
|
||||
规整 Audiences 新版消息页文本空白字符。
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
text = re.sub(r"\s+", " ", text.replace("\xa0", " ")).strip()
|
||||
return text or None
|
||||
|
||||
def __fill_empty_message_content_from_list(self, msg_link: str, head, date, content):
|
||||
"""
|
||||
使用列表页预览填补详情页解析不到的字段。
|
||||
"""
|
||||
preview = self.__message_list_previews.get(urljoin(self._base_url, msg_link))
|
||||
if not preview:
|
||||
return head, date, content
|
||||
preview_head, preview_date, preview_content = preview
|
||||
return head or preview_head, date or preview_date, content or preview_content
|
||||
|
||||
def __should_fetch_next_unread_page(self, new_message_links: list) -> bool:
|
||||
"""
|
||||
判断是否还需要继续请求 Audiences 下一页未读消息列表。
|
||||
|
||||
@@ -269,6 +269,129 @@ def test_audiences_readpm_row_is_not_unread_message():
|
||||
assert msg_links == []
|
||||
|
||||
|
||||
def test_audiences_pm_item_unread_links_use_list_preview_when_detail_empty():
|
||||
"""
|
||||
Audiences 新版 div 私信列表应能识别未读行,并在详情页不可解析时使用列表预览通知。
|
||||
"""
|
||||
parser = NexusAudiencesSiteUserInfo(
|
||||
site_name="Audiences",
|
||||
url="https://audiences.me/",
|
||||
site_cookie="",
|
||||
apikey=None,
|
||||
token=None,
|
||||
)
|
||||
parser.message_unread = 7
|
||||
unread_rows = "".join(
|
||||
f"""
|
||||
<div class="pm-item-row is-unread">
|
||||
<div class="pm-item">
|
||||
<input class="pm-item__check" type="checkbox" name="messages[]" value="{4495900 + index}">
|
||||
<span class="pm-item__status pm-item__status--unread" title="未读"></span>
|
||||
<a class="pm-item__subject"
|
||||
href="messages.php?action=viewmessage&id={4495900 + index}">种子被删除</a>
|
||||
<span class="pm-item__user"><i class="fas fa-user" aria-hidden="true"></i>系统</span>
|
||||
<span class="pm-item__time">2026-06-22 22:32:11</span>
|
||||
</div>
|
||||
<div class="pm-item__preview">
|
||||
你下载的种子'Wonder Wall S01E{index:02d} 2026 1080p WEB-DL H265 AAC-ADWeb'被管理员删除。
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
for index in range(1, 8)
|
||||
)
|
||||
list_html = f"""
|
||||
<html>
|
||||
<body>
|
||||
<form action="messages.php" method="post">
|
||||
<div class="pm-list">
|
||||
{unread_rows}
|
||||
<div class="pm-item-row">
|
||||
<div class="pm-item">
|
||||
<span class="pm-item__status" title="已读"></span>
|
||||
<a class="pm-item__subject"
|
||||
href="messages.php?action=viewmessage&id=4419171">已读消息</a>
|
||||
<span class="pm-item__time">2026-06-07 14:27:45</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
requested_urls = []
|
||||
|
||||
def fake_get_page_content(url, params=None, headers=None):
|
||||
"""
|
||||
模拟新版列表页可读,但详情页结构暂不兼容导致解析为空。
|
||||
"""
|
||||
requested_urls.append(url)
|
||||
return "<html></html>" if "viewmessage" in url else list_html
|
||||
|
||||
parser._get_page_content = fake_get_page_content
|
||||
|
||||
parser._pase_unread_msgs()
|
||||
|
||||
detail_requests = [url for url in requested_urls if "viewmessage" in url]
|
||||
assert len(detail_requests) == 7
|
||||
assert len(parser.message_unread_contents) == 7
|
||||
assert parser.message_unread_contents[0] == (
|
||||
"种子被删除",
|
||||
"2026-06-22 22:32:11",
|
||||
"你下载的种子'Wonder Wall S01E01 2026 1080p WEB-DL H265 AAC-ADWeb'被管理员删除。",
|
||||
)
|
||||
assert "已读消息" not in [item[0] for item in parser.message_unread_contents]
|
||||
|
||||
|
||||
def test_audiences_pm_view_message_content_is_parsed():
|
||||
"""
|
||||
Audiences 新版短消息详情页应解析 pm-view 中的标题、日期和正文。
|
||||
"""
|
||||
parser = NexusAudiencesSiteUserInfo(
|
||||
site_name="Audiences",
|
||||
url="https://audiences.me/",
|
||||
site_cookie="",
|
||||
apikey=None,
|
||||
token=None,
|
||||
)
|
||||
html_text = """
|
||||
<html>
|
||||
<body>
|
||||
<td class="embedded">
|
||||
<div class="pm-page">
|
||||
<div class="pm-hero">
|
||||
<div class="pm-hero__text">
|
||||
<h1 class="pm-hero__title">种子被删除</h1>
|
||||
<p class="pm-hero__sub">自 系统</p>
|
||||
</div>
|
||||
</div>
|
||||
<article class="pm-view">
|
||||
<header class="pm-view__head">
|
||||
<div class="pm-view__meta">
|
||||
<span class="pm-view__label">自</span>
|
||||
<span class="pm-view__value">系统</span>
|
||||
</div>
|
||||
<div class="pm-view__meta">
|
||||
<span class="pm-view__label">日期</span>
|
||||
<span class="pm-view__value">2026-06-22 22:32:11 </span>
|
||||
</div>
|
||||
</header>
|
||||
<div class="pm-view__body">
|
||||
你下载的种子'Wonder Wall S01E20 2026 1080p WEB-DL H265 AAC-ADWeb'被管理员删除。原因:已完结剧集,清理单集。
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</td>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
head, date, content = parser._parse_message_content(html_text)
|
||||
|
||||
assert head == "种子被删除"
|
||||
assert date == "2026-06-22 22:32:11"
|
||||
assert content == "你下载的种子'Wonder Wall S01E20 2026 1080p WEB-DL H265 AAC-ADWeb'被管理员删除。原因:已完结剧集,清理单集。"
|
||||
|
||||
|
||||
def test_audiences_unread_mailbox_only_uses_user_box():
|
||||
"""
|
||||
Audiences 只使用用户消息箱,首页不传 page,page=1 实际表示第二页。
|
||||
|
||||
Reference in New Issue
Block a user