From 328ab56a218054f116d197dba22749f04e707776 Mon Sep 17 00:00:00 2001
From: EstrellaXD <estrellaxd05@gmail.com>
Date: Tue, 9 May 2023 09:49:13 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=95=B0=E6=8D=AE=E5=BA=93?=
 =?UTF-8?q?=E6=93=8D=E4=BD=9C,=E5=AE=8C=E5=96=84RSS=E8=A7=A3=E6=9E=90?=
 =?UTF-8?q?=E5=99=A8=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/module/core/download_client.py     |  4 +-
 src/module/database/connector.py       |  4 +-
 src/module/database/operator.py        | 52 +++++++++++++---------
 src/module/models/bangumi.py           |  3 +-
 src/module/network/request_contents.py | 23 +++++-----
 src/module/parser/title_parser.py      |  9 +---
 src/module/rss/rss_analyser.py         | 61 +++++++++++++++++---------
 7 files changed, 95 insertions(+), 61 deletions(-)

diff --git a/src/module/core/download_client.py b/src/module/core/download_client.py
index 91856d5e..9e25c87d 100644
--- a/src/module/core/download_client.py
+++ b/src/module/core/download_client.py
@@ -45,7 +45,7 @@ class DownloadClient:
             prefs = self.client.get_app_prefs()
             self.download_path = os.path.join(prefs["save_path"], "Bangumi")
 
-    def set_rule(self, info: BangumiData, rss_link):
+    def set_rule(self, info: BangumiData):
         official_name, raw_name, season, group = (
             info.official_title,
             info.title_raw,
@@ -60,7 +60,7 @@ class DownloadClient:
             "episodeFilter": "",
             "smartFilter": False,
             "previouslyMatchedEpisodes": [],
-            "affectedFeeds": [rss_link],
+            "affectedFeeds": info.rss_link,
             "ignoreDays": 0,
             "lastMatch": "",
             "addPaused": False,
diff --git a/src/module/database/connector.py b/src/module/database/connector.py
index 135071a3..9a976214 100644
--- a/src/module/database/connector.py
+++ b/src/module/database/connector.py
@@ -27,7 +27,9 @@ class DataConnector:
                 eps_collect INTEGER NOT NULL,
                 offset INTEGER NOT NULL,
                 filter TEXT NOT NULL,
-                rss_link TEXT NOT NULL
+                rss_link TEXT NOT NULL,
+                poster_link TEXT,
+                added INTEGER NOT NULL
             );
             """
         )
diff --git a/src/module/database/operator.py b/src/module/database/operator.py
index ca4039e0..d14d181b 100644
--- a/src/module/database/operator.py
+++ b/src/module/database/operator.py
@@ -21,7 +21,7 @@ class DataOperator(DataConnector):
     def db_to_data(db_data: dict) -> BangumiData:
         for key, item in db_data.items():
             if isinstance(item, int):
-                if key not in ["id", "offset", "season"]:
+                if key not in ["id", "offset", "season", "year"]:
                     db_data[key] = bool(item)
             elif key in ["filter", "rss_link"]:
                 db_data[key] = item.split(",")
@@ -45,7 +45,9 @@ class DataOperator(DataConnector):
                 eps_collect,
                 offset,
                 filter,
-                rss_link
+                rss_link,
+                poster_link,
+                added
                 ) VALUES (
                 :id,
                 :official_title,
@@ -60,7 +62,9 @@ class DataOperator(DataConnector):
                 :eps_collect,
                 :offset,
                 :filter,
-                :rss_link
+                :rss_link,
+                :poster_link,
+                :added
                 )
                 """,
             db_data,
@@ -86,7 +90,9 @@ class DataOperator(DataConnector):
                 eps_collect,
                 offset,
                 filter,
-                rss_link
+                rss_link,
+                poster_link,
+                added
                 ) VALUES (
                 :id,
                 :official_title,
@@ -101,7 +107,9 @@ class DataOperator(DataConnector):
                 :eps_collect,
                 :offset,
                 :filter,
-                :rss_link
+                :rss_link,
+                :poster_link,
+                :added
                 )
                 """,
             db_data,
@@ -125,7 +133,9 @@ class DataOperator(DataConnector):
                 eps_collect = :eps_collect,
                 offset = :offset,
                 filter = :filter,
-                rss_link = :rss_link
+                rss_link = :rss_link,
+                poster_link = :poster_link,
+                added = :added
             WHERE id = :id
             """,
             db_data,
@@ -133,18 +143,18 @@ class DataOperator(DataConnector):
         self._conn.commit()
         return self._cursor.rowcount == 1
 
-    def update_rss(self, title_raw, rss_set: list[str]):
+    def update_column(self, title_raw: str, column: str, value: str):
+
+    def update_rss(self, title_raw, rss_set: str):
+        # Update rss and select all data
         self._cursor.execute(
             """
-            UPDATE bangumi SET
-                rss_link = :rss_link
-            WHERE title_raw = :title_raw
+            UPDATE bangumi SET rss_link = :rss_link WHERE title_raw = :title_raw
             """,
-            {"rss_link": ",".join(rss_set), "title_raw": title_raw},
+            {"rss_link": rss_set, "title_raw": title_raw},
         )
         self._conn.commit()
         logger.info(f"Update {title_raw} rss_link to {rss_set}.")
-        return self._cursor.rowcount == 1
 
     def search_id(self, _id: int) -> BangumiData | None:
         self._cursor.execute(
@@ -194,7 +204,7 @@ class DataOperator(DataConnector):
         )
         return self._cursor.fetchone() is not None
 
-    def match_list(self, title_dict: dict) -> dict:
+    def match_list(self, title_dict: dict, rss_link: str) -> dict:
         # Match title_raw in database
         self._cursor.execute(
             """
@@ -203,15 +213,17 @@ class DataOperator(DataConnector):
         )
         data = self._cursor.fetchall()
         if not data:
-            return {}
+            return title_dict
         # Match title
-        for title, rss_link in title_dict.items():
+        for title in title_dict.copy().keys():
             for title_raw, rss_set in data:
-                if rss_link in rss_set and title_raw in title:
-                    del title_dict[title]
-                elif rss_link not in rss_set and title_raw in title:
-                    # TODO: Logic problem
-                break
+                if title_raw in title:
+                    if rss_link in rss_set:
+                        title_dict.pop(title)
+                    else:
+                        rss_set += "," + rss_link
+                        self.update_rss(title_raw, rss_set)
+                    break
         return title_dict
 
     def not_exist_titles(self, titles: list[str], rss_link) -> list[str]:
diff --git a/src/module/models/bangumi.py b/src/module/models/bangumi.py
index 536ff734..6d8e9062 100644
--- a/src/module/models/bangumi.py
+++ b/src/module/models/bangumi.py
@@ -17,7 +17,8 @@ class BangumiData(BaseModel):
     offset: int = Field(0, alias="offset", title="番剧偏移量")
     filter: list[str] = Field(..., alias="filter", title="番剧过滤器")
     rss_link: list[str] = Field(..., alias="rss_link", title="番剧RSS链接")
-    # poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
+    poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
+    added: bool = Field(False, alias="added", title="是否已添加")
 
 
 class Notification(BaseModel):
diff --git a/src/module/network/request_contents.py b/src/module/network/request_contents.py
index 5dd3b10d..d63eae4f 100644
--- a/src/module/network/request_contents.py
+++ b/src/module/network/request_contents.py
@@ -13,6 +13,7 @@ FILTER = "|".join(settings.rss_parser.filter)
 class TorrentInfo:
     name: str
     torrent_link: str
+    homepage: str = None
 
 
 class RequestContent(RequestURL):
@@ -29,22 +30,24 @@ class RequestContent(RequestURL):
             torrent_homepage.append(item.find("link").text)
 
         torrents = []
-        for _title, torrent_url in zip(torrent_titles, torrent_urls):
+        for _title, torrent_url, homepage in zip(torrent_titles, torrent_urls, torrent_homepage):
             if _filter:
                 if re.search(FILTER, _title) is None:
-                    torrents.append(TorrentInfo(_title, torrent_url))
+                    torrents.append(TorrentInfo(_title, torrent_url, homepage))
             else:
-                torrents.append(TorrentInfo(_title, torrent_url))
+                torrents.append(TorrentInfo(_title, torrent_url, homepage))
         return torrents
 
-    def get_poster(self, _url):
-        content = self.get_html(_url).text
+    def get_mikan_info(self, _url) -> tuple[str, str]:
+        content = self.get_html(_url)
         soup = BeautifulSoup(content, "html.parser")
-        div = soup.find("div", {"class": "bangumi-poster"})
-        style = div.get("style")
-        if style:
-            return style.split("url('")[1].split("')")[0]
-        return None
+        poster_div = soup.find("div", {"class": "bangumi-poster"})
+        poster_style = poster_div.get("style")
+        official_title = soup.select_one('p.bangumi-title a[href^="/Home/Bangumi/"]').text
+        if poster_style:
+            poster_path = poster_style.split("url('")[1].split("')")[0]
+            return poster_path, official_title
+        return "", ""
 
     def get_xml(self, _url) -> xml.etree.ElementTree.Element:
         return xml.etree.ElementTree.fromstring(self.get_url(_url).text)
diff --git a/src/module/parser/title_parser.py b/src/module/parser/title_parser.py
index 2cd12609..39c692d3 100644
--- a/src/module/parser/title_parser.py
+++ b/src/module/parser/title_parser.py
@@ -47,13 +47,8 @@ class TitleParser:
             }
             title_search = episode.title_zh if episode.title_zh else episode.title_en
             title_raw = episode.title_en if episode.title_en else episode.title_zh
-            if settings.rss_parser.enable_tmdb:
-                official_title, _season = self.tmdb_parser(
-                    title_search, episode.season, language
-                )
-            else:
-                official_title = titles[language] if titles[language] else titles["zh"]
-                _season = episode.season
+            official_title = titles[language] if titles[language] else titles["zh"]
+            _season = episode.season
             data = BangumiData(
                 id=_id,
                 official_title=official_title,
diff --git a/src/module/rss/rss_analyser.py b/src/module/rss/rss_analyser.py
index 65942032..52cb0eeb 100644
--- a/src/module/rss/rss_analyser.py
+++ b/src/module/rss/rss_analyser.py
@@ -2,39 +2,56 @@ import logging
 
 from module.network import RequestContent
 from module.parser import TitleParser
-from module.models import Config
+from module.models import Config, BangumiData
 from module.database import DataOperator
 from module.core import DownloadClient
 
 logger = logging.getLogger(__name__)
 
 
-class RSSAnalyser(DownloadClient):
+class RSSAnalyser:
     def __init__(self, settings: Config):
-        super().__init__(settings)
         self._title_analyser = TitleParser()
         self.settings = settings
 
-    def rss_to_datas(self, rss_link: str):
+    def rss_to_datas(self, rss_link: str) -> list[BangumiData]:
         with RequestContent() as req:
             rss_torrents = req.get_torrents(rss_link)
-        title_dict = {torrent.name: rss_link for torrent in rss_torrents}
+        title_dict = {torrent.name: torrent.homepage for torrent in rss_torrents}
         with DataOperator() as op:
-            update_dict = op.match_list(title_dict)
-            if not update_dict:
+            new_dict = op.match_list(title_dict, rss_link)
+            print(new_dict)
+            if not new_dict:
                 logger.debug("No new title found.")
-                return
+                return []
             _id = op.gen_id()
-            for raw_title in add_title_list:
-                data = self._title_analyser.raw_parser(
-                    raw=raw_title, _id=_id, settings=self.settings, rss_link=rss_link
-                )
-                if data is not None:
-                    op.insert(data)
-                    self.set_rule(data, rss_link)
-                    _id += 1
+            new_data = []
+            # New List
+            with RequestContent() as req:
+                for raw_title, homepage in new_dict.items():
+                    data = self._title_analyser.raw_parser(
+                        raw=raw_title, settings=self.settings, rss_link=rss_link, _id=_id
+                    )
+                    if data is not None:
+                        poster_link, official_title = req.get_mikan_info(homepage)
+                        data.poster_link = poster_link
+                        # Official title type
+                        if self.settings.rss_parser.parser_type == "mikan":
+                            data.official_title = official_title
+                        elif self.settings.rss_parser.parser_type == "tmdb":
+                            official_title, year, season = self._title_analyser.tmdb_parser()
+                            data.official_title = official_title
+                            data.year = year
+                            data.season = season
+                        else:
+                            pass
+                        new_data.append(data)
+                        _id += 1
+                        logger.debug(f"New title found: {data.official_title}")
+                op.insert_list(new_data)
+            return new_data
 
-    def rss_to_data(self, url, _filter: bool = True):
+    def rss_to_data(self, url, _filter: bool = True) -> BangumiData:
         with RequestContent() as req:
             rss_torrents = req.get_torrents(url, _filter)
         for torrent in rss_torrents:
@@ -42,7 +59,12 @@ class RSSAnalyser(DownloadClient):
                 data = self._title_analyser.raw_parser(
                     torrent.name, settings=self.settings, rss_link=url
                 )
-                self.set_rule(data, url)
+                if data is not None:
+                    with DataOperator() as op:
+                        _id = op.gen_id()
+                        data.id = _id
+                        op.insert(data)
+                return data
             except Exception as e:
                 logger.debug(e)
 
@@ -58,5 +80,4 @@ if __name__ == '__main__':
     from module.conf import settings, setup_logger
     setup_logger(settings)
     link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d"
-    with RSSAnalyser(settings) as analyser:
-        analyser.rss_to_datas(link)
+    data = RSSAnalyser(settings).rss_to_datas(link)