feat: 优化RSS订阅和网页抓取中发布日期(PubDate)的获取兼容性

- app/helper/rss.py: 优化RSS解析,支持带命名空间的日期标签(如 pubDate/published/updated)。
- app/modules/indexer/spider/__init__.py: 优化网页抓取,增加日期格式校验并对非标准格式进行自动归一化。
This commit is contained in:
CHANTXU64
2026-02-02 16:52:04 +08:00
parent 4e74d32882
commit 8323834483
2 changed files with 10 additions and 1 deletions

View File

@@ -382,7 +382,10 @@ class RssHelper:
size = int(size_attr)
# 发布日期
pubdate_nodes = item.xpath('.//pubDate | .//published | .//updated')
pubdate_nodes = item.xpath('./pubDate | ./published | ./updated')
if not pubdate_nodes:
pubdate_nodes = item.xpath('.//*[local-name()="pubDate"] | .//*[local-name()="published"] | .//*[local-name()="updated"]')
pubdate = ""
if pubdate_nodes and pubdate_nodes[0].text:
pubdate = StringUtils.get_time(pubdate_nodes[0].text)

View File

@@ -428,6 +428,12 @@ class SiteSpider:
if pubdate_str:
pubdate_str = pubdate_str.replace('\n', ' ').strip()
self.torrents_info['pubdate'] = self.__filter_text(pubdate_str, selector.get('filters'))
if self.torrents_info.get('pubdate'):
try:
if not isinstance(self.torrents_info['pubdate'], datetime.datetime):
datetime.datetime.strptime(str(self.torrents_info['pubdate']), '%Y-%m-%d %H:%M:%S')
except (ValueError, TypeError):
self.torrents_info['pubdate'] = StringUtils.unify_datetime_str(str(self.torrents_info['pubdate']))
def __get_date_elapsed(self, torrent: Any):
# torrent date elapsed text