From adbe3a7aae503b3e2abd6f59d134ed8cd607604d Mon Sep 17 00:00:00 2001 From: estrella Date: Fri, 16 Jun 2023 21:10:35 +0800 Subject: [PATCH] feat: add rss engine --- backend/src/module/database/__init__.py | 2 + backend/src/module/database/torrent.py | 49 ++++++------ backend/src/module/models/__init__.py | 4 +- backend/src/module/models/rss.py | 19 +++-- .../src/module/network/request_contents.py | 9 +++ backend/src/module/network/site/mikan.py | 4 + backend/src/module/rss/engine.py | 75 +++++++++++++++++++ backend/src/module/rss/poller.py | 26 ------- 8 files changed, 129 insertions(+), 59 deletions(-) create mode 100644 backend/src/module/rss/engine.py delete mode 100644 backend/src/module/rss/poller.py diff --git a/backend/src/module/database/__init__.py b/backend/src/module/database/__init__.py index c5d59b08..cd90685d 100644 --- a/backend/src/module/database/__init__.py +++ b/backend/src/module/database/__init__.py @@ -1 +1,3 @@ from .bangumi import BangumiDatabase +from .rss import RSSDatabase +from .torrent import TorrentDatabase \ No newline at end of file diff --git a/backend/src/module/database/torrent.py b/backend/src/module/database/torrent.py index bbdf0f8c..2821182f 100644 --- a/backend/src/module/database/torrent.py +++ b/backend/src/module/database/torrent.py @@ -1,17 +1,24 @@ import logging -from .connector import DataConnector +from .orm import Connector +from module.models import TorrentData +from module.conf import DATA_PATH logger = logging.getLogger(__name__) -class TorrentDatabase(DataConnector): - def update_table(self): - table_name = "torrent" - db_data = self.__data_to_db() - self._update_table(table_name, db_data) +class TorrentDatabase(Connector): + def __init__(self, database: str = DATA_PATH): + super().__init__( + table_name="torrent", + data=TorrentData().dict(), + database=DATA_PATH + ) - def __data_to_db(self, data: SaveTorrent): + def update_table(self): + self.update.table() + + def __data_to_db(self, data: TorrentData) -> dict: db_data = data.dict() for key, value in db_data.items(): if isinstance(value, bool): @@ -20,28 +27,18 @@ class TorrentDatabase(DataConnector): db_data[key] = ",".join(value) return db_data - def __db_to_data(self, db_data: dict): + def __db_to_data(self, db_data: dict) -> TorrentData: for key, item in db_data.items(): if isinstance(item, int): - if key not in ["id", "offset", "season", "year"]: - db_data[key] = bool(item) + db_data[key] = bool(item) elif key in ["filter", "rss_link"]: db_data[key] = item.split(",") - return SaveTorrent(**db_data) + return TorrentData(**db_data) - def if_downloaded(self, torrent_url: str, torrent_name: str) -> bool: - self._cursor.execute( - "SELECT * FROM torrent WHERE torrent_url = ? OR torrent_name = ?", - (torrent_url, torrent_name), - ) - return bool(self._cursor.fetchone()) + def insert_many(self, data_list: list[TorrentData]): + dict_datas = [self.__data_to_db(data) for data in data_list] + self.insert.many(dict_datas) - def insert(self, data: SaveTorrent): - db_data = self.__data_to_db(data) - columns = ", ".join(db_data.keys()) - values = ", ".join([f":{key}" for key in db_data.keys()]) - self._cursor.execute( - f"INSERT INTO torrent ({columns}) VALUES ({values})", db_data - ) - logger.debug(f"Add {data.torrent_name} into database.") - self._conn.commit() + def get_all(self) -> list[TorrentData]: + dict_datas = self.select.all() + return [self.__db_to_data(data) for data in dict_datas] diff --git a/backend/src/module/models/__init__.py b/backend/src/module/models/__init__.py index a73f18ed..c2a2599e 100644 --- a/backend/src/module/models/__init__.py +++ b/backend/src/module/models/__init__.py @@ -1,5 +1,5 @@ -from .bangumi import * +from .bangumi import BangumiData from .config import Config -from .rss import RSSTorrents +from .rss import RSSItem, TorrentData from .torrent import EpisodeFile, SubtitleFile, TorrentBase from .user import UserLogin diff --git a/backend/src/module/models/rss.py b/backend/src/module/models/rss.py index c763c4c5..66f38292 100644 --- a/backend/src/module/models/rss.py +++ b/backend/src/module/models/rss.py @@ -1,9 +1,18 @@ from pydantic import BaseModel, Field -class RSSTorrents(BaseModel): - name: str = Field(..., alias="item_path") +class RSSItem(BaseModel): + id: int = Field(0, alias="id", title="id") + item_path: str = Field("example path", alias="item_path") + url: str = Field("https://mikanani.me", alias="url") + combine: bool = Field(True, alias="combine") + enabled: bool = Field(True, alias="enabled") + + +class TorrentData(BaseModel): + id: int = Field(0, alias="id") + name: str = Field(..., alias="name") url: str = Field(..., alias="url") - analyze: bool = Field(..., alias="analyze") - enabled: bool = Field(..., alias="enabled") - torrents: list[str] = Field(..., alias="torrents") + matched: bool = Field(..., alias="matched") + downloaded: bool = Field(..., alias="downloaded") + save_path: str = Field(..., alias="save_path") diff --git a/backend/src/module/network/request_contents.py b/backend/src/module/network/request_contents.py index fcef7774..7f00a1d1 100644 --- a/backend/src/module/network/request_contents.py +++ b/backend/src/module/network/request_contents.py @@ -95,3 +95,12 @@ class RequestContent(RequestURL): def check_connection(self, _url): return self.check_url(_url) + + def get_rss_title(self, _url): + soup = self.get_xml(_url) + return soup.find("title").text + + +if __name__ == '__main__': + with RequestContent() as req: + req.get_xml("https://mikanani.me/RSS/Classic") diff --git a/backend/src/module/network/site/mikan.py b/backend/src/module/network/site/mikan.py index 0ad4314b..ce6595e7 100644 --- a/backend/src/module/network/site/mikan.py +++ b/backend/src/module/network/site/mikan.py @@ -7,3 +7,7 @@ def mikan_parser(soup): torrent_urls.append(item.find("enclosure").attrib["url"]) torrent_homepage.append(item.find("link").text) return torrent_titles, torrent_urls, torrent_homepage + + +def mikan_title(soup): + return soup.find("title").text diff --git a/backend/src/module/rss/engine.py b/backend/src/module/rss/engine.py new file mode 100644 index 00000000..8befad0c --- /dev/null +++ b/backend/src/module/rss/engine.py @@ -0,0 +1,75 @@ +import re + +from module.database import RSSDatabase, BangumiDatabase, TorrentDatabase +from module.models import BangumiData, RSSItem, TorrentData +from module.network import RequestContent, TorrentInfo + + +class RSSEngine(RequestContent): + + @staticmethod + def _get_rss_items() -> list[RSSItem]: + with RSSDatabase() as db: + return db.get_all() + + @staticmethod + def _get_bangumi_data(rss_link: str) -> list[BangumiData]: + with BangumiDatabase() as db: + return db.get_rss_data(rss_link) + + def add_rss(self, rss_link: str, name: str, combine: bool): + if not name: + name = self.get_rss_title(rss_link) + insert_data = RSSItem(item_path=name, url=rss_link, combine=combine) + with RSSDatabase() as db: + db.insert_one(insert_data) + + def pull_rss(self, rss_item: RSSItem) -> list[TorrentInfo]: + torrents = self.get_torrents(rss_item.url) + return torrents + + @staticmethod + def match_torrent(torrent: TorrentInfo) -> TorrentData | None: + with BangumiDatabase() as db: + bangumi_data = db.match_torrent(torrent.name) + if bangumi_data: + _filter = "|".join(bangumi_data.filter) + if re.search(_filter, torrent.name): + return None + else: + return TorrentData( + name=torrent.name, + url=torrent.torrent_link, + ) + return None + + @staticmethod + def filter_torrent(torrents: list[TorrentInfo]) -> list[TorrentInfo]: + with TorrentDatabase() as db: + in_db_torrents = db.get_all() + in_db_torrents = [x.name for x in in_db_torrents] + i = 0 + while i < len(torrents): + torrent = torrents[i] + if torrent.name in in_db_torrents: + torrents.pop(i) + i += 1 + return torrents + + def run(self): + # Get All RSS Items + rss_items: list[RSSItem] = self._get_rss_items() + # From RSS Items, get all torrents + for rss_item in rss_items: + torrents = self.get_torrents(rss_item.url) + self.filter_torrent(torrents) + # Get all enabled bangumi data + matched_torrents = [] + for torrent in torrents: + matched_torrent = self.match_torrent(torrent) + if matched_torrent: + matched_torrents.append(matched_torrent) + # Add to database + with TorrentDatabase() as db: + db.insert_many(matched_torrents) + return matched_torrents diff --git a/backend/src/module/rss/poller.py b/backend/src/module/rss/poller.py deleted file mode 100644 index 90a81cb0..00000000 --- a/backend/src/module/rss/poller.py +++ /dev/null @@ -1,26 +0,0 @@ -import re - -from module.database import RSSDatabase -from module.models import BangumiData, RSSTorrents -from module.network import RequestContent, TorrentInfo - - -class RSSPoller(RSSDatabase): - @staticmethod - def polling(rss_link, req: RequestContent) -> list[TorrentInfo]: - return req.get_torrents(rss_link) - - @staticmethod - def filter_torrent(data: BangumiData, torrent: TorrentInfo) -> bool: - if data.title_raw in torrent.name: - _filter = "|".join(data.filter) - if not re.search(_filter, torrent.name): - return True - else: - return False - - def foo(self): - rss_datas: list[RSSTorrents] = self.get_rss_data() - with RequestContent() as req: - for rss_data in rss_datas: - self.polling(rss_data.url, req)