fix(lxml): Adjust HTML element checks to prevent FutureWarning

This commit is contained in:
InfinityPacer
2024-08-31 02:14:52 +08:00
parent 9ab852c1ad
commit 93801e857e
13 changed files with 35 additions and 23 deletions

View File

@@ -205,7 +205,7 @@ class SiteChain(ChainBase):
logger.error(f"获取站点页面失败:{url}")
return favicon_url, None
html = etree.HTML(html_text)
if html:
if StringUtils.is_valid_html_element(html):
fav_link = html.xpath('//head/link[contains(@rel, "icon")]/@href')
if fav_link:
favicon_url = urljoin(url, fav_link[0])

View File

@@ -363,7 +363,7 @@ class RssHelper:
return "", f"获取RSS链接失败无法连接 {url} "
# 解析HTML
html = etree.HTML(html_text)
if html:
if StringUtils.is_valid_html_element(html):
rss_link = html.xpath(site_conf.get("xpath"))
if rss_link:
return str(rss_link[-1]), ""

View File

@@ -34,7 +34,7 @@ class DiscuzUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
# 用户等级
@@ -77,7 +77,7 @@ class DiscuzUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 3

View File

@@ -82,7 +82,7 @@ class FileListSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 6

View File

@@ -65,7 +65,7 @@ class GazelleSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
# 用户等级
@@ -95,7 +95,7 @@ class GazelleSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 3

View File

@@ -39,7 +39,7 @@ class IptSiteUserInfo(SiteParserBase):
def _parse_user_detail_info(self, html_text: str):
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return
user_levels_text = html.xpath('//tr/th[text()="Class"]/following-sibling::td[1]/text()')
@@ -53,7 +53,7 @@ class IptSiteUserInfo(SiteParserBase):
def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]:
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return
# seeding start
seeding_end_pos = 3

View File

@@ -42,7 +42,7 @@ class NexusHhanclubSiteUserInfo(NexusPhpSiteUserInfo):
super()._parse_user_detail_info(html_text)
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return
# 加入时间
join_at_text = html.xpath('//*[@id="mainContent"]/div/div[2]/div[4]/div[3]/span[2]/text()[1]')

View File

@@ -34,7 +34,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return
message_labels = html.xpath('//a[@href="messages.php"]/..')
@@ -61,7 +61,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
self._parse_message_unread(html_text)
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//b//text()')
@@ -128,7 +128,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:param html:
:return:
"""
if html:
if StringUtils.is_valid_html_element(html):
gold, silver, copper = None, None, None
golds = html.xpath('//span[@class = "ucoin-symbol ucoin-gold"]//text()')
@@ -155,7 +155,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(str(html_text).replace(r'\/', '/'))
if not html:
if not StringUtils.is_valid_html_element(html):
return None
# 首页存在扩展链接,使用扩展链接
@@ -223,7 +223,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return
self._get_user_level(html)
@@ -340,7 +340,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]:
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
message_links = html.xpath('//tr[not(./td/img[@alt="Read"])]/td/a[contains(@href, "viewmessage")]/@href')
@@ -355,7 +355,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
def _parse_message_content(self, html_text):
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None, None, None
# 标题
message_head_text = None

View File

@@ -63,7 +63,7 @@ class SmallHorseSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 6

View File

@@ -67,7 +67,7 @@ class TorrentLeechSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 2

View File

@@ -40,7 +40,7 @@ class Unit3dSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
# 用户等级
@@ -64,7 +64,7 @@ class Unit3dSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 9

View File

@@ -1,5 +1,7 @@
from lxml import etree
from app.utils.string import StringUtils
class SiteUtils:
@@ -11,7 +13,7 @@ class SiteUtils:
:return:
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return False
# 存在明显的密码输入框,说明未登录
if html.xpath("//input[@type='password']"):
@@ -39,7 +41,7 @@ class SiteUtils:
:return True已签到 False未签到
"""
html = etree.HTML(html_text)
if not html:
if not StringUtils.is_valid_html_element(html):
return False
# 站点签到支持的识别XPATH
xpaths = [

View File

@@ -795,3 +795,13 @@ class StringUtils:
:return: 如果输入值不是 None返回去除空白字符后的字符串否则返回 None
"""
return value.strip() if value is not None else None
@staticmethod
def is_valid_html_element(elem) -> bool:
"""
检查elem是否为有效的HTML元素。元素必须为非None并且具有非零长度。
:param elem: 要检查的HTML元素
:return: 如果elem有效非None且长度大于0返回True否则返回False
"""
return elem is not None and len(elem) > 0