diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index 5f67d9c6..b5d5d8f8 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -64,6 +64,8 @@ class TorrentSpider: torrents_info_array: list = [] # 搜索超时, 默认: 15秒 _timeout = 15 + # 站点解析时是否需要编码 + encoding: bool = False def __init__(self, indexer: CommentedMap, @@ -95,6 +97,7 @@ class TorrentSpider: self.domain = indexer.get('domain') self.result_num = int(indexer.get('result_num') or 100) self._timeout = int(indexer.get('timeout') or 15) + self.encoding = indexer.get('encoding', False) self.page = page if self.domain and not str(self.domain).endswith("/"): self.domain = self.domain + "/" @@ -728,7 +731,10 @@ class TorrentSpider: self.torrents_info_array = [] try: # 解析站点文本对象 - html_doc = PyQuery(html_text) + if self.encoding: + html_doc = PyQuery(html_text.encode('utf-8')) + else: + html_doc = PyQuery(html_text) # 种子筛选器 torrents_selector = self.list.get('selector', '') # 遍历种子html列表