mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-02-02 18:22:39 +08:00
feat: 优化RSS订阅和网页抓取中发布日期(PubDate)的获取兼容性
- app/helper/rss.py: 优化RSS解析,支持带命名空间的日期标签(如 pubDate/published/updated)。 - app/modules/indexer/spider/__init__.py: 优化网页抓取,增加日期格式校验并对非标准格式进行自动归一化。
This commit is contained in:
@@ -382,7 +382,10 @@ class RssHelper:
|
||||
size = int(size_attr)
|
||||
|
||||
# 发布日期
|
||||
pubdate_nodes = item.xpath('.//pubDate | .//published | .//updated')
|
||||
pubdate_nodes = item.xpath('./pubDate | ./published | ./updated')
|
||||
if not pubdate_nodes:
|
||||
pubdate_nodes = item.xpath('.//*[local-name()="pubDate"] | .//*[local-name()="published"] | .//*[local-name()="updated"]')
|
||||
|
||||
pubdate = ""
|
||||
if pubdate_nodes and pubdate_nodes[0].text:
|
||||
pubdate = StringUtils.get_time(pubdate_nodes[0].text)
|
||||
|
||||
@@ -428,6 +428,12 @@ class SiteSpider:
|
||||
if pubdate_str:
|
||||
pubdate_str = pubdate_str.replace('\n', ' ').strip()
|
||||
self.torrents_info['pubdate'] = self.__filter_text(pubdate_str, selector.get('filters'))
|
||||
if self.torrents_info.get('pubdate'):
|
||||
try:
|
||||
if not isinstance(self.torrents_info['pubdate'], datetime.datetime):
|
||||
datetime.datetime.strptime(str(self.torrents_info['pubdate']), '%Y-%m-%d %H:%M:%S')
|
||||
except (ValueError, TypeError):
|
||||
self.torrents_info['pubdate'] = StringUtils.unify_datetime_str(str(self.torrents_info['pubdate']))
|
||||
|
||||
def __get_date_elapsed(self, torrent: Any):
|
||||
# torrent date elapsed text
|
||||
|
||||
Reference in New Issue
Block a user