From 80ea9ec65704e9b8e22de01c51559203a118a72b Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Fri, 4 Aug 2023 19:02:29 +0800 Subject: [PATCH] feat: Complete RSS engine. --- backend/src/module/database/bangumi.py | 18 ++-- backend/src/module/database/combine.py | 4 +- backend/src/module/database/rss.py | 22 ++++- backend/src/module/database/torrent.py | 10 +-- backend/src/module/models/torrent.py | 1 + .../src/module/network/request_contents.py | 9 +- backend/src/module/rss/engine.py | 83 +++++++------------ 7 files changed, 68 insertions(+), 79 deletions(-) diff --git a/backend/src/module/database/bangumi.py b/backend/src/module/database/bangumi.py index 64de1994..a432dc94 100644 --- a/backend/src/module/database/bangumi.py +++ b/backend/src/module/database/bangumi.py @@ -1,6 +1,6 @@ import logging -from sqlmodel import Session, select, delete, or_ +from sqlmodel import Session, select, delete, or_, and_ from sqlalchemy.sql import func from typing import Optional @@ -87,7 +87,9 @@ class BangumiDatabase: def match_poster(self, bangumi_name: str) -> str: # Use like to match - statement = select(Bangumi).where(func.instr(bangumi_name, Bangumi.title_raw) > 0) + statement = select(Bangumi).where( + func.instr(bangumi_name, Bangumi.title_raw) > 0 + ) data = self.session.exec(statement).first() if data: return data.poster_link @@ -115,6 +117,12 @@ class BangumiDatabase: i += 1 return torrent_list + def match_torrent(self, torrent_name: str) -> Optional[Bangumi]: + statement = select(Bangumi).where( + and_(func.instr(torrent_name, Bangumi.title_raw) > 0, not Bangumi.deleted) + ) + return self.session.exec(statement).first() + def not_complete(self) -> list[Bangumi]: # Find eps_complete = False condition = select(Bangumi).where(Bangumi.eps_collect == 0) @@ -140,7 +148,5 @@ class BangumiDatabase: logger.debug(f"[Database] Disable rule {bangumi.title_raw}.") def search_rss(self, rss_link: str) -> list[Bangumi]: - statement = select(Bangumi).where( - func.instr(rss_link, Bangumi.rss_link) > 0 - ) - return self.session.exec(statement).all() \ No newline at end of file + statement = select(Bangumi).where(func.instr(rss_link, Bangumi.rss_link) > 0) + return self.session.exec(statement).all() diff --git a/backend/src/module/database/combine.py b/backend/src/module/database/combine.py index 20c2e32e..9066f378 100644 --- a/backend/src/module/database/combine.py +++ b/backend/src/module/database/combine.py @@ -1,4 +1,4 @@ -from sqlmodel import Session,SQLModel +from sqlmodel import Session, SQLModel from .engine import engine from .rss import RSSDatabase @@ -15,4 +15,4 @@ class Database(Session): @staticmethod def create_table(): - SQLModel.metadata.create_all(engine) \ No newline at end of file + SQLModel.metadata.create_all(engine) diff --git a/backend/src/module/database/rss.py b/backend/src/module/database/rss.py index 72b08a0f..bc539280 100644 --- a/backend/src/module/database/rss.py +++ b/backend/src/module/database/rss.py @@ -12,7 +12,20 @@ class RSSDatabase: def __init__(self, session: Session): self.session = session - def insert_one(self, data: RSSItem): + def add(self, data: RSSItem): + # Check if exists + statement = select(RSSItem).where(RSSItem.url == data.url) + db_data = self.session.exec(statement).first() + if db_data: + logger.debug(f"RSS Item {data.url} already exists.") + return + else: + logger.debug(f"RSS Item {data.url} not exists, adding...") + self.session.add(data) + self.session.commit() + self.session.refresh(data) + + def update(self, data: RSSItem): self.session.add(data) self.session.commit() self.session.refresh(data) @@ -20,7 +33,10 @@ class RSSDatabase: def search_all(self) -> list[RSSItem]: return self.session.exec(select(RSSItem)).all() - def delete_one(self, _id: int): + def search_active(self) -> list[RSSItem]: + return self.session.exec(select(RSSItem).where(RSSItem.enabled)).all() + + def delete(self, _id: int): condition = delete(RSSItem).where(RSSItem.id == _id) self.session.exec(condition) self.session.commit() @@ -29,5 +45,3 @@ class RSSDatabase: condition = delete(RSSItem) self.session.exec(condition) self.session.commit() - - diff --git a/backend/src/module/database/torrent.py b/backend/src/module/database/torrent.py index e6fc2d02..2c32dfca 100644 --- a/backend/src/module/database/torrent.py +++ b/backend/src/module/database/torrent.py @@ -11,13 +11,13 @@ class TorrentDatabase: def __init__(self, session: Session): self.session = session - def insert_one(self, data: Torrent): + def add(self, data: Torrent): self.session.add(data) self.session.commit() self.session.refresh(data) logger.debug(f"Insert {data.name} in database.") - def insert_many(self, datas: list[Torrent]): + def add_all(self, datas: list[Torrent]): self.session.add_all(datas) self.session.commit() logger.debug(f"Insert {len(datas)} torrents in database.") @@ -46,9 +46,9 @@ class TorrentDatabase: def check_new(self, torrents_list: list[Torrent]) -> list[Torrent]: new_torrents = [] + old_torrents = self.search_all() + old_urls = [t.url for t in old_torrents] for torrent in torrents_list: - statement = select(Torrent).where(Torrent.name == torrent.name) - db_torrent = self.session.exec(statement).first() - if not db_torrent: + if torrent.url not in old_urls: new_torrents.append(torrent) return new_torrents diff --git a/backend/src/module/models/torrent.py b/backend/src/module/models/torrent.py index e44f6104..1e6198d7 100644 --- a/backend/src/module/models/torrent.py +++ b/backend/src/module/models/torrent.py @@ -9,6 +9,7 @@ class Torrent(SQLModel, table=True): name: str = Field("", alias="name") url: str = Field("https://example.com/torrent", alias="url") homepage: Optional[str] = Field(None, alias="homepage") + save_path: Optional[str] = Field(None, alias="saved_path") downloaded: bool = Field(False, alias="downloaded") diff --git a/backend/src/module/network/request_contents.py b/backend/src/module/network/request_contents.py index c42018f0..c2e865fa 100644 --- a/backend/src/module/network/request_contents.py +++ b/backend/src/module/network/request_contents.py @@ -97,11 +97,4 @@ class RequestContent(RequestURL): def get_rss_title(self, _url): soup = self.get_xml(_url) - return soup.find("title").text - - -if __name__ == '__main__': - with RequestContent() as req: - ts = req.get_torrents("https://mikanani.me/RSS/Classic") - for t in ts: - print(t) + return soup.find("./channel/title").text diff --git a/backend/src/module/rss/engine.py b/backend/src/module/rss/engine.py index da68c701..1c775ce7 100644 --- a/backend/src/module/rss/engine.py +++ b/backend/src/module/rss/engine.py @@ -1,20 +1,16 @@ import re +import logging -from module.database import RSSDatabase, BangumiDatabase, TorrentDatabase from module.models import Bangumi, RSSItem, Torrent from module.network import RequestContent, TorrentInfo +from module.downloader import DownloadClient from module.database.combine import Database class RSSEngine(Database): - def _get_rss_items(self) -> list[RSSItem]: - return self.rss.search_all() - - def _get_bangumi_data(self, rss_link: str) -> list[Bangumi]: - return self.bangumi.search_rss(rss_link) - - def get_torrent(self, rss_link: str) -> list[Torrent]: + @staticmethod + def _get_torrents(rss_link: str) -> list[Torrent]: with RequestContent() as req: torrent_infos = req.get_torrents(rss_link) torrents: list[Torrent] = [] @@ -28,62 +24,41 @@ class RSSEngine(Database): ) return torrents - def check_new_torrents(self, torrents_list: list[list[Torrent]]) -> list[Torrent]: - return self.torrent.check_new(torrents_list) - - def add_rss(self, rss_link: str, name: str, combine: bool): + def add_rss(self, rss_link: str, name: str | None = None, combine: bool = True): if not name: - name = self.get_rss_title(rss_link) - insert_data = RSSItem(item_path=name, url=rss_link, combine=combine) - with RSSDatabase() as db: - db.insert_one(insert_data) + with RequestContent() as req: + name = req.get_rss_title(rss_link) + rss_data = RSSItem(item_path=name, url=rss_link, combine=combine) + self.rss.add(rss_data) - def pull_rss(self, rss_item: RSSItem) -> list[TorrentInfo]: - torrents = self.get_torrents(rss_item.url) - return torrents - - @staticmethod - def match_torrent(torrent: TorrentInfo, rss_link: str) -> TorrentData | None: - with BangumiDatabase() as db: - bangumi_data = db.match_torrent(torrent.name, rss_link) - if bangumi_data: - _filter = "|".join(bangumi_data.filter) - if re.search(_filter, torrent.name): - return None - else: - return TorrentData( - name=torrent.name, - url=torrent.torrent_link, - ) - return None - - @staticmethod - def filter_torrent(torrents: list[TorrentInfo]) -> list[TorrentInfo]: - new_torrents = [] - with TorrentDatabase() as db: - in_db_torrents: list = db.get_torrent_name() - for torrent in torrents: - if torrent.name not in in_db_torrents: - new_torrents.append(torrent) + def pull_rss(self, rss_item: RSSItem) -> list[Torrent]: + torrents = self._get_torrents(rss_item.url) + new_torrents = self.torrent.check_new(torrents) return new_torrents + def match_torrent(self, torrent: Torrent): + matched: Bangumi = self.bangumi.match_torrent(torrent.name) + if matched: + torrent.refer_id = matched.id + torrent.save_path = matched.save_path + with RequestContent() as req: + torrent_file = req.get_content(torrent.url) + with DownloadClient() as client: + client.add_torrent( + {"torrent_files": torrent_file, "save_path": torrent.save_path} + ) + torrent.downloaded = True + def run(self): # Get All RSS Items - rss_items: list[RSSItem] = self._get_rss_items() + rss_items: list[RSSItem] = self.rss.search_active() # From RSS Items, get all torrents for rss_item in rss_items: - torrents = self.get_torrents(rss_item.url) - new_torrents = self.filter_torrent(torrents) + new_torrents = self.pull_rss(rss_item) # Get all enabled bangumi data - matched_torrents = [] for torrent in new_torrents: - matched_torrent = self.match_torrent(torrent) - if matched_torrent: - matched_torrents.append(matched_torrent) - # Add to database - with TorrentDatabase() as db: - db.insert_many(matched_torrents) - return matched_torrents + self.match_torrent(torrent) + self.torrent.add_all(new_torrents) if __name__ == "__main__":