From 328ab56a218054f116d197dba22749f04e707776 Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Tue, 9 May 2023 09:49:13 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=95=B0=E6=8D=AE=E5=BA=93?= =?UTF-8?q?=E6=93=8D=E4=BD=9C,=E5=AE=8C=E5=96=84RSS=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=99=A8=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/module/core/download_client.py | 4 +- src/module/database/connector.py | 4 +- src/module/database/operator.py | 52 +++++++++++++--------- src/module/models/bangumi.py | 3 +- src/module/network/request_contents.py | 23 +++++----- src/module/parser/title_parser.py | 9 +--- src/module/rss/rss_analyser.py | 61 +++++++++++++++++--------- 7 files changed, 95 insertions(+), 61 deletions(-) diff --git a/src/module/core/download_client.py b/src/module/core/download_client.py index 91856d5e..9e25c87d 100644 --- a/src/module/core/download_client.py +++ b/src/module/core/download_client.py @@ -45,7 +45,7 @@ class DownloadClient: prefs = self.client.get_app_prefs() self.download_path = os.path.join(prefs["save_path"], "Bangumi") - def set_rule(self, info: BangumiData, rss_link): + def set_rule(self, info: BangumiData): official_name, raw_name, season, group = ( info.official_title, info.title_raw, @@ -60,7 +60,7 @@ class DownloadClient: "episodeFilter": "", "smartFilter": False, "previouslyMatchedEpisodes": [], - "affectedFeeds": [rss_link], + "affectedFeeds": info.rss_link, "ignoreDays": 0, "lastMatch": "", "addPaused": False, diff --git a/src/module/database/connector.py b/src/module/database/connector.py index 135071a3..9a976214 100644 --- a/src/module/database/connector.py +++ b/src/module/database/connector.py @@ -27,7 +27,9 @@ class DataConnector: eps_collect INTEGER NOT NULL, offset INTEGER NOT NULL, filter TEXT NOT NULL, - rss_link TEXT NOT NULL + rss_link TEXT NOT NULL, + poster_link TEXT, + added INTEGER NOT NULL ); """ ) diff --git a/src/module/database/operator.py b/src/module/database/operator.py index ca4039e0..d14d181b 100644 --- a/src/module/database/operator.py +++ b/src/module/database/operator.py @@ -21,7 +21,7 @@ class DataOperator(DataConnector): def db_to_data(db_data: dict) -> BangumiData: for key, item in db_data.items(): if isinstance(item, int): - if key not in ["id", "offset", "season"]: + if key not in ["id", "offset", "season", "year"]: db_data[key] = bool(item) elif key in ["filter", "rss_link"]: db_data[key] = item.split(",") @@ -45,7 +45,9 @@ class DataOperator(DataConnector): eps_collect, offset, filter, - rss_link + rss_link, + poster_link, + added ) VALUES ( :id, :official_title, @@ -60,7 +62,9 @@ class DataOperator(DataConnector): :eps_collect, :offset, :filter, - :rss_link + :rss_link, + :poster_link, + :added ) """, db_data, @@ -86,7 +90,9 @@ class DataOperator(DataConnector): eps_collect, offset, filter, - rss_link + rss_link, + poster_link, + added ) VALUES ( :id, :official_title, @@ -101,7 +107,9 @@ class DataOperator(DataConnector): :eps_collect, :offset, :filter, - :rss_link + :rss_link, + :poster_link, + :added ) """, db_data, @@ -125,7 +133,9 @@ class DataOperator(DataConnector): eps_collect = :eps_collect, offset = :offset, filter = :filter, - rss_link = :rss_link + rss_link = :rss_link, + poster_link = :poster_link, + added = :added WHERE id = :id """, db_data, @@ -133,18 +143,18 @@ class DataOperator(DataConnector): self._conn.commit() return self._cursor.rowcount == 1 - def update_rss(self, title_raw, rss_set: list[str]): + def update_column(self, title_raw: str, column: str, value: str): + + def update_rss(self, title_raw, rss_set: str): + # Update rss and select all data self._cursor.execute( """ - UPDATE bangumi SET - rss_link = :rss_link - WHERE title_raw = :title_raw + UPDATE bangumi SET rss_link = :rss_link WHERE title_raw = :title_raw """, - {"rss_link": ",".join(rss_set), "title_raw": title_raw}, + {"rss_link": rss_set, "title_raw": title_raw}, ) self._conn.commit() logger.info(f"Update {title_raw} rss_link to {rss_set}.") - return self._cursor.rowcount == 1 def search_id(self, _id: int) -> BangumiData | None: self._cursor.execute( @@ -194,7 +204,7 @@ class DataOperator(DataConnector): ) return self._cursor.fetchone() is not None - def match_list(self, title_dict: dict) -> dict: + def match_list(self, title_dict: dict, rss_link: str) -> dict: # Match title_raw in database self._cursor.execute( """ @@ -203,15 +213,17 @@ class DataOperator(DataConnector): ) data = self._cursor.fetchall() if not data: - return {} + return title_dict # Match title - for title, rss_link in title_dict.items(): + for title in title_dict.copy().keys(): for title_raw, rss_set in data: - if rss_link in rss_set and title_raw in title: - del title_dict[title] - elif rss_link not in rss_set and title_raw in title: - # TODO: Logic problem - break + if title_raw in title: + if rss_link in rss_set: + title_dict.pop(title) + else: + rss_set += "," + rss_link + self.update_rss(title_raw, rss_set) + break return title_dict def not_exist_titles(self, titles: list[str], rss_link) -> list[str]: diff --git a/src/module/models/bangumi.py b/src/module/models/bangumi.py index 536ff734..6d8e9062 100644 --- a/src/module/models/bangumi.py +++ b/src/module/models/bangumi.py @@ -17,7 +17,8 @@ class BangumiData(BaseModel): offset: int = Field(0, alias="offset", title="番剧偏移量") filter: list[str] = Field(..., alias="filter", title="番剧过滤器") rss_link: list[str] = Field(..., alias="rss_link", title="番剧RSS链接") - # poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接") + poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接") + added: bool = Field(False, alias="added", title="是否已添加") class Notification(BaseModel): diff --git a/src/module/network/request_contents.py b/src/module/network/request_contents.py index 5dd3b10d..d63eae4f 100644 --- a/src/module/network/request_contents.py +++ b/src/module/network/request_contents.py @@ -13,6 +13,7 @@ FILTER = "|".join(settings.rss_parser.filter) class TorrentInfo: name: str torrent_link: str + homepage: str = None class RequestContent(RequestURL): @@ -29,22 +30,24 @@ class RequestContent(RequestURL): torrent_homepage.append(item.find("link").text) torrents = [] - for _title, torrent_url in zip(torrent_titles, torrent_urls): + for _title, torrent_url, homepage in zip(torrent_titles, torrent_urls, torrent_homepage): if _filter: if re.search(FILTER, _title) is None: - torrents.append(TorrentInfo(_title, torrent_url)) + torrents.append(TorrentInfo(_title, torrent_url, homepage)) else: - torrents.append(TorrentInfo(_title, torrent_url)) + torrents.append(TorrentInfo(_title, torrent_url, homepage)) return torrents - def get_poster(self, _url): - content = self.get_html(_url).text + def get_mikan_info(self, _url) -> tuple[str, str]: + content = self.get_html(_url) soup = BeautifulSoup(content, "html.parser") - div = soup.find("div", {"class": "bangumi-poster"}) - style = div.get("style") - if style: - return style.split("url('")[1].split("')")[0] - return None + poster_div = soup.find("div", {"class": "bangumi-poster"}) + poster_style = poster_div.get("style") + official_title = soup.select_one('p.bangumi-title a[href^="/Home/Bangumi/"]').text + if poster_style: + poster_path = poster_style.split("url('")[1].split("')")[0] + return poster_path, official_title + return "", "" def get_xml(self, _url) -> xml.etree.ElementTree.Element: return xml.etree.ElementTree.fromstring(self.get_url(_url).text) diff --git a/src/module/parser/title_parser.py b/src/module/parser/title_parser.py index 2cd12609..39c692d3 100644 --- a/src/module/parser/title_parser.py +++ b/src/module/parser/title_parser.py @@ -47,13 +47,8 @@ class TitleParser: } title_search = episode.title_zh if episode.title_zh else episode.title_en title_raw = episode.title_en if episode.title_en else episode.title_zh - if settings.rss_parser.enable_tmdb: - official_title, _season = self.tmdb_parser( - title_search, episode.season, language - ) - else: - official_title = titles[language] if titles[language] else titles["zh"] - _season = episode.season + official_title = titles[language] if titles[language] else titles["zh"] + _season = episode.season data = BangumiData( id=_id, official_title=official_title, diff --git a/src/module/rss/rss_analyser.py b/src/module/rss/rss_analyser.py index 65942032..52cb0eeb 100644 --- a/src/module/rss/rss_analyser.py +++ b/src/module/rss/rss_analyser.py @@ -2,39 +2,56 @@ import logging from module.network import RequestContent from module.parser import TitleParser -from module.models import Config +from module.models import Config, BangumiData from module.database import DataOperator from module.core import DownloadClient logger = logging.getLogger(__name__) -class RSSAnalyser(DownloadClient): +class RSSAnalyser: def __init__(self, settings: Config): - super().__init__(settings) self._title_analyser = TitleParser() self.settings = settings - def rss_to_datas(self, rss_link: str): + def rss_to_datas(self, rss_link: str) -> list[BangumiData]: with RequestContent() as req: rss_torrents = req.get_torrents(rss_link) - title_dict = {torrent.name: rss_link for torrent in rss_torrents} + title_dict = {torrent.name: torrent.homepage for torrent in rss_torrents} with DataOperator() as op: - update_dict = op.match_list(title_dict) - if not update_dict: + new_dict = op.match_list(title_dict, rss_link) + print(new_dict) + if not new_dict: logger.debug("No new title found.") - return + return [] _id = op.gen_id() - for raw_title in add_title_list: - data = self._title_analyser.raw_parser( - raw=raw_title, _id=_id, settings=self.settings, rss_link=rss_link - ) - if data is not None: - op.insert(data) - self.set_rule(data, rss_link) - _id += 1 + new_data = [] + # New List + with RequestContent() as req: + for raw_title, homepage in new_dict.items(): + data = self._title_analyser.raw_parser( + raw=raw_title, settings=self.settings, rss_link=rss_link, _id=_id + ) + if data is not None: + poster_link, official_title = req.get_mikan_info(homepage) + data.poster_link = poster_link + # Official title type + if self.settings.rss_parser.parser_type == "mikan": + data.official_title = official_title + elif self.settings.rss_parser.parser_type == "tmdb": + official_title, year, season = self._title_analyser.tmdb_parser() + data.official_title = official_title + data.year = year + data.season = season + else: + pass + new_data.append(data) + _id += 1 + logger.debug(f"New title found: {data.official_title}") + op.insert_list(new_data) + return new_data - def rss_to_data(self, url, _filter: bool = True): + def rss_to_data(self, url, _filter: bool = True) -> BangumiData: with RequestContent() as req: rss_torrents = req.get_torrents(url, _filter) for torrent in rss_torrents: @@ -42,7 +59,12 @@ class RSSAnalyser(DownloadClient): data = self._title_analyser.raw_parser( torrent.name, settings=self.settings, rss_link=url ) - self.set_rule(data, url) + if data is not None: + with DataOperator() as op: + _id = op.gen_id() + data.id = _id + op.insert(data) + return data except Exception as e: logger.debug(e) @@ -58,5 +80,4 @@ if __name__ == '__main__': from module.conf import settings, setup_logger setup_logger(settings) link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d" - with RSSAnalyser(settings) as analyser: - analyser.rss_to_datas(link) + data = RSSAnalyser(settings).rss_to_datas(link)