diff --git a/app/modules/indexer/parser/nexus_rabbit.py b/app/modules/indexer/parser/nexus_rabbit.py index f5a24fdc..90a2e80d 100644 --- a/app/modules/indexer/parser/nexus_rabbit.py +++ b/app/modules/indexer/parser/nexus_rabbit.py @@ -1,21 +1,60 @@ # -*- coding: utf-8 -*- +import re import json from typing import Optional - +from lxml import etree +from urllib.parse import urljoin from app.log import logger from app.modules.indexer.parser import SiteSchema -from app.modules.indexer.parser.nexus_php import NexusPhpSiteUserInfo +from app.modules.indexer.parser import SiteParserBase +from app.utils.string import StringUtils -class NexusRabbitSiteUserInfo(NexusPhpSiteUserInfo): +class NexusRabbitSiteUserInfo(SiteParserBase): schema = SiteSchema.NexusRabbit def _parse_site_page(self, html_text: str): - super()._parse_site_page(html_text) - self._torrent_seeding_page = f"getusertorrentlistajax.php?page=1&limit=5000000&type=seeding&uid={self.userid}" - self._torrent_seeding_headers = {"Accept": "application/json, text/javascript, */*; q=0.01"} + html_text = self._prepare_html_text(html_text) - def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + user_detail = re.search(r"user.php\?id=(\d+)", html_text) + + if not (user_detail and user_detail.group().strip()): + return + + self.userid = user_detail.group(1) + self._user_detail_page = f"user.php?id={self.userid}" + + self._user_traffic_page = None + + self._torrent_seeding_page = "api/general" + self._torrent_seeding_params = { + "page": 1, + "limit": 5000000, + "action": "userTorrentsList", + "data": {"type": "seeding", "id": int(self.userid)}, + } + self._torrent_seeding_headers = { + "Content-Type": "application/json", + "Accept": "application/json, text/plain, */*", + "X-Requested-With": "XMLHttpRequest", # 必须要加上这一条,不然返回的是空数据 + } + + self._user_mail_unread_page = None + self._sys_mail_unread_page = "api/general" + self._mail_unread_params = { + "page": 1, + "limit": 5000000, + "action": "getMessageIn", + } + self._mail_unread_headers = { + "Content-Type": "application/json", + "Accept": "application/json, text/plain, */*", + "X-Requested-With": "XMLHttpRequest", + } + + def _parse_user_torrent_seeding_info( + self, html_text: str, multi_page: bool = False + ) -> Optional[str]: """ 做种相关信息 :param html_text: @@ -24,22 +63,112 @@ class NexusRabbitSiteUserInfo(NexusPhpSiteUserInfo): """ try: - torrents = json.loads(html_text).get('data') + torrents = json.loads(html_text).get("data", []) except Exception as e: logger.error(f"解析做种信息失败: {str(e)}") return - page_seeding_size = 0 - page_seeding_info = [] + seeding_size = 0 + seeding_info = [] - page_seeding = len(torrents) for torrent in torrents: - seeders = int(torrent.get('seeders', 0)) - size = int(torrent.get('size', 0)) - page_seeding_size += int(torrent.get('size', 0)) + seeders = int(torrent.get("seeders", 0)) + size = StringUtils.num_filesize(torrent.get("size")) + seeding_size += size + seeding_info.append([seeders, size]) - page_seeding_info.append([seeders, size]) + self.seeding = len(torrents) + self.seeding_size = seeding_size + self.seeding_info = seeding_info - self.seeding += page_seeding - self.seeding_size += page_seeding_size - self.seeding_info.extend(page_seeding_info) + def _parse_message_unread_links( + self, html_text: str, msg_links: list + ) -> str | None: + unread_ids = [] + try: + messages = json.loads(html_text).get("data", []) + except Exception as e: + logger.error(f"解析未读消息失败: {e}") + return + for msg in messages: + msg_id, msg_unread = msg.get("id"), msg.get("unread") + if not (msg_id and msg_unread) or msg_unread == "no": + continue + unread_ids.append(msg_id) + head, date, content = msg.get("subject"), msg.get("added"), msg.get("msg") + if head and date and content: + self.message_unread_contents.append((head, date, content)) + self.message_unread = len(unread_ids) + if unread_ids: + self._get_page_content( + url=urljoin(self._base_url, "api/general?loading=true"), + params={"action": "readMessage", "data": {"ids": unread_ids}}, + headers={ + "Content-Type": "application/json", + "Accept": "application/json, text/plain, */*", + "X-Requested-With": "XMLHttpRequest", + }, + ) + return None + + def _parse_user_base_info(self, html_text: str): + """只有奶糖余额才需要在 base 中获取,其它均可以在详情页拿到""" + html = etree.HTML(html_text) + if not StringUtils.is_valid_html_element(html): + return + bonus = html.xpath( + '//div[contains(text(), "奶糖余额")]/following-sibling::div[1]/text()' + ) + if bonus: + self.bonus = StringUtils.str_float(bonus[0].strip()) + + def _parse_user_detail_info(self, html_text: str): + html = etree.HTML(html_text) + if not StringUtils.is_valid_html_element(html): + return + # 缩小一下查找范围,所有的信息都在这个 div 里 + user_info = html.xpath('//div[contains(@class, "layui-hares-user-info-right")]') + if not user_info: + return + user_info = user_info[0] + # 用户名 + if username := user_info.xpath( + './/span[contains(text(), "用户名")]/a/span/text()' + ): + self.username = username[0].strip() + # 等级 + if user_level := user_info.xpath('.//span[contains(text(), "等级")]/b/text()'): + self.user_level = user_level[0].strip() + # 加入日期 + if join_date := user_info.xpath('.//span[contains(text(), "注册日期")]/text()'): + join_date = join_date[0].strip().split("\r")[0].removeprefix("注册日期:") + self.join_at = StringUtils.unify_datetime_str(join_date) + # 上传量 + if upload := user_info.xpath('.//span[contains(text(), "上传量")]/text()'): + self.upload = StringUtils.num_filesize( + upload[0].strip().removeprefix("上传量:") + ) + # 下载量 + if download := user_info.xpath('.//span[contains(text(), "下载量")]/text()'): + self.download = StringUtils.num_filesize( + download[0].strip().removeprefix("下载量:") + ) + # 分享率 + if ratio := user_info.xpath('.//span[contains(text(), "分享率")]/em/text()'): + self.ratio = StringUtils.str_float(ratio[0].strip()) + + def _parse_message_content(self, html_text): + """ + 解析短消息内容,已经在 _parse_message_unread_links 内实现,重载防止 abstractmethod 报错 + :param html_text: + :return: head: message, date: time, content: message content + """ + pass + + def _parse_user_traffic_info(self, html_text: str): + """ + 解析用户的上传,下载,分享率等信息,已经在 _parse_user_detail_info 内实现,重载防止 abstractmethod 报错 + :param html_text: + :return: + """ + pass diff --git a/app/utils/site.py b/app/utils/site.py index 0b19a50c..f1831683 100644 --- a/app/utils/site.py +++ b/app/utils/site.py @@ -24,7 +24,8 @@ class SiteUtils: ' or contains(@data-url, "logout")' ' or contains(@href, "mybonus") ' ' or contains(@onclick, "logout")' - ' or contains(@href, "usercp")]', + ' or contains(@href, "usercp")' + ' or contains(@lay-on, "logout")]', '//form[contains(@action, "logout")]', '//div[@class="user-info-side"]', '//a[@id="myitem"]'