From 832383448376b2fc82cb3bfd21d8b44630c1cedf Mon Sep 17 00:00:00 2001 From: CHANTXU64 Date: Mon, 2 Feb 2026 16:52:04 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96RSS=E8=AE=A2=E9=98=85?= =?UTF-8?q?=E5=92=8C=E7=BD=91=E9=A1=B5=E6=8A=93=E5=8F=96=E4=B8=AD=E5=8F=91?= =?UTF-8?q?=E5=B8=83=E6=97=A5=E6=9C=9F(PubDate)=E7=9A=84=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - app/helper/rss.py: 优化RSS解析,支持带命名空间的日期标签(如 pubDate/published/updated)。 - app/modules/indexer/spider/__init__.py: 优化网页抓取,增加日期格式校验并对非标准格式进行自动归一化。 --- app/helper/rss.py | 5 ++++- app/modules/indexer/spider/__init__.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/app/helper/rss.py b/app/helper/rss.py index 5cd309d8..5257ff0c 100644 --- a/app/helper/rss.py +++ b/app/helper/rss.py @@ -382,7 +382,10 @@ class RssHelper: size = int(size_attr) # 发布日期 - pubdate_nodes = item.xpath('.//pubDate | .//published | .//updated') + pubdate_nodes = item.xpath('./pubDate | ./published | ./updated') + if not pubdate_nodes: + pubdate_nodes = item.xpath('.//*[local-name()="pubDate"] | .//*[local-name()="published"] | .//*[local-name()="updated"]') + pubdate = "" if pubdate_nodes and pubdate_nodes[0].text: pubdate = StringUtils.get_time(pubdate_nodes[0].text) diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index 1ced2441..ef2bba9d 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -428,6 +428,12 @@ class SiteSpider: if pubdate_str: pubdate_str = pubdate_str.replace('\n', ' ').strip() self.torrents_info['pubdate'] = self.__filter_text(pubdate_str, selector.get('filters')) + if self.torrents_info.get('pubdate'): + try: + if not isinstance(self.torrents_info['pubdate'], datetime.datetime): + datetime.datetime.strptime(str(self.torrents_info['pubdate']), '%Y-%m-%d %H:%M:%S') + except (ValueError, TypeError): + self.torrents_info['pubdate'] = StringUtils.unify_datetime_str(str(self.torrents_info['pubdate'])) def __get_date_elapsed(self, torrent: Any): # torrent date elapsed text