diff --git a/app/chain/site.py b/app/chain/site.py index 0a0a6114..b157455c 100644 --- a/app/chain/site.py +++ b/app/chain/site.py @@ -205,7 +205,7 @@ class SiteChain(ChainBase): logger.error(f"获取站点页面失败:{url}") return favicon_url, None html = etree.HTML(html_text) - if html: + if StringUtils.is_valid_html_element(html): fav_link = html.xpath('//head/link[contains(@rel, "icon")]/@href') if fav_link: favicon_url = urljoin(url, fav_link[0]) diff --git a/app/helper/rss.py b/app/helper/rss.py index 2470c95b..b97462c3 100644 --- a/app/helper/rss.py +++ b/app/helper/rss.py @@ -363,7 +363,7 @@ class RssHelper: return "", f"获取RSS链接失败:无法连接 {url} " # 解析HTML html = etree.HTML(html_text) - if html: + if StringUtils.is_valid_html_element(html): rss_link = html.xpath(site_conf.get("xpath")) if rss_link: return str(rss_link[-1]), "" diff --git a/app/modules/indexer/parser/discuz.py b/app/modules/indexer/parser/discuz.py index 67098daa..7d28378b 100644 --- a/app/modules/indexer/parser/discuz.py +++ b/app/modules/indexer/parser/discuz.py @@ -34,7 +34,7 @@ class DiscuzUserInfo(SiteParserBase): :return: """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None # 用户等级 @@ -77,7 +77,7 @@ class DiscuzUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None size_col = 3 diff --git a/app/modules/indexer/parser/file_list.py b/app/modules/indexer/parser/file_list.py index 4991a8b9..f3652409 100644 --- a/app/modules/indexer/parser/file_list.py +++ b/app/modules/indexer/parser/file_list.py @@ -82,7 +82,7 @@ class FileListSiteUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None size_col = 6 diff --git a/app/modules/indexer/parser/gazelle.py b/app/modules/indexer/parser/gazelle.py index 73616768..0acf01dd 100644 --- a/app/modules/indexer/parser/gazelle.py +++ b/app/modules/indexer/parser/gazelle.py @@ -65,7 +65,7 @@ class GazelleSiteUserInfo(SiteParserBase): :return: """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None # 用户等级 @@ -95,7 +95,7 @@ class GazelleSiteUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None size_col = 3 diff --git a/app/modules/indexer/parser/ipt_project.py b/app/modules/indexer/parser/ipt_project.py index 1e4eb7a5..cb268002 100644 --- a/app/modules/indexer/parser/ipt_project.py +++ b/app/modules/indexer/parser/ipt_project.py @@ -39,7 +39,7 @@ class IptSiteUserInfo(SiteParserBase): def _parse_user_detail_info(self, html_text: str): html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return user_levels_text = html.xpath('//tr/th[text()="Class"]/following-sibling::td[1]/text()') @@ -53,7 +53,7 @@ class IptSiteUserInfo(SiteParserBase): def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return # seeding start seeding_end_pos = 3 diff --git a/app/modules/indexer/parser/nexus_hhanclub.py b/app/modules/indexer/parser/nexus_hhanclub.py index af3b57e0..4667933e 100644 --- a/app/modules/indexer/parser/nexus_hhanclub.py +++ b/app/modules/indexer/parser/nexus_hhanclub.py @@ -42,7 +42,7 @@ class NexusHhanclubSiteUserInfo(NexusPhpSiteUserInfo): super()._parse_user_detail_info(html_text) html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return # 加入时间 join_at_text = html.xpath('//*[@id="mainContent"]/div/div[2]/div[4]/div[3]/span[2]/text()[1]') diff --git a/app/modules/indexer/parser/nexus_php.py b/app/modules/indexer/parser/nexus_php.py index 33c8651f..d83733df 100644 --- a/app/modules/indexer/parser/nexus_php.py +++ b/app/modules/indexer/parser/nexus_php.py @@ -34,7 +34,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): :return: """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return message_labels = html.xpath('//a[@href="messages.php"]/..') @@ -61,7 +61,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): self._parse_message_unread(html_text) html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//b//text()') @@ -128,7 +128,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): :param html: :return: """ - if html: + if StringUtils.is_valid_html_element(html): gold, silver, copper = None, None, None golds = html.xpath('//span[@class = "ucoin-symbol ucoin-gold"]//text()') @@ -155,7 +155,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(str(html_text).replace(r'\/', '/')) - if not html: + if not StringUtils.is_valid_html_element(html): return None # 首页存在扩展链接,使用扩展链接 @@ -223,7 +223,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): :return: """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return self._get_user_level(html) @@ -340,7 +340,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None message_links = html.xpath('//tr[not(./td/img[@alt="Read"])]/td/a[contains(@href, "viewmessage")]/@href') @@ -355,7 +355,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): def _parse_message_content(self, html_text): html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None, None, None # 标题 message_head_text = None diff --git a/app/modules/indexer/parser/small_horse.py b/app/modules/indexer/parser/small_horse.py index 31968144..1a095495 100644 --- a/app/modules/indexer/parser/small_horse.py +++ b/app/modules/indexer/parser/small_horse.py @@ -63,7 +63,7 @@ class SmallHorseSiteUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None size_col = 6 diff --git a/app/modules/indexer/parser/torrent_leech.py b/app/modules/indexer/parser/torrent_leech.py index 4bd44c1e..66448db2 100644 --- a/app/modules/indexer/parser/torrent_leech.py +++ b/app/modules/indexer/parser/torrent_leech.py @@ -67,7 +67,7 @@ class TorrentLeechSiteUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None size_col = 2 diff --git a/app/modules/indexer/parser/unit3d.py b/app/modules/indexer/parser/unit3d.py index 98e48e02..b2bd0e07 100644 --- a/app/modules/indexer/parser/unit3d.py +++ b/app/modules/indexer/parser/unit3d.py @@ -40,7 +40,7 @@ class Unit3dSiteUserInfo(SiteParserBase): :return: """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None # 用户等级 @@ -64,7 +64,7 @@ class Unit3dSiteUserInfo(SiteParserBase): :return: 下页地址 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return None size_col = 9 diff --git a/app/utils/site.py b/app/utils/site.py index b3a60740..0b19a50c 100644 --- a/app/utils/site.py +++ b/app/utils/site.py @@ -1,5 +1,7 @@ from lxml import etree +from app.utils.string import StringUtils + class SiteUtils: @@ -11,7 +13,7 @@ class SiteUtils: :return: """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return False # 存在明显的密码输入框,说明未登录 if html.xpath("//input[@type='password']"): @@ -39,7 +41,7 @@ class SiteUtils: :return True已签到 False未签到 """ html = etree.HTML(html_text) - if not html: + if not StringUtils.is_valid_html_element(html): return False # 站点签到支持的识别XPATH xpaths = [ diff --git a/app/utils/string.py b/app/utils/string.py index 7f9c6f33..3d1e34a3 100644 --- a/app/utils/string.py +++ b/app/utils/string.py @@ -795,3 +795,13 @@ class StringUtils: :return: 如果输入值不是 None,返回去除空白字符后的字符串,否则返回 None """ return value.strip() if value is not None else None + + @staticmethod + def is_valid_html_element(elem) -> bool: + """ + 检查elem是否为有效的HTML元素。元素必须为非None并且具有非零长度。 + + :param elem: 要检查的HTML元素 + :return: 如果elem有效(非None且长度大于0),返回True;否则返回False + """ + return elem is not None and len(elem) > 0