From 4d956f70a0e21f162b98c7265dc0edcef842409d Mon Sep 17 00:00:00 2001 From: EstrellaXD Date: Sun, 7 May 2023 23:31:18 +0800 Subject: [PATCH] Migrate rssanalyser from json to sqlite --- src/module/app.py | 8 +-- src/module/core/api_func.py | 2 +- src/module/database/connector.py | 7 +- src/module/database/operator.py | 100 ++++++++++++++++++++--------- src/module/manager/eps_complete.py | 5 +- src/module/models/bangumi.py | 4 +- src/module/parser/title_parser.py | 4 +- src/module/rss/rss_analyser.py | 29 +++++---- 8 files changed, 104 insertions(+), 55 deletions(-) diff --git a/src/module/app.py b/src/module/app.py index 11cee9b6..0c5613ab 100644 --- a/src/module/app.py +++ b/src/module/app.py @@ -1,11 +1,9 @@ import os -import time import logging import asyncio -from module.conf import setup_logger, LOG_PATH, RSSLink, VERSION +from module.conf import LOG_PATH, VERSION -from module.core import DownloadClient from module.manager import Renamer, FullSeasonGet from module.rss import RSSAnalyser from module.models import Config @@ -23,8 +21,8 @@ async def rss_loop( rss_link: str, settings: Config, ): - with RSSAnalyser(settings) as rss: - rss.rss_to_datas(rss_link) + with RSSAnalyser(settings) as analyser: + analyser.rss_to_datas(rss_link) if settings.bangumi_manage.eps_complete: with FullSeasonGet(settings) as season: season.eps_complete() diff --git a/src/module/core/api_func.py b/src/module/core/api_func.py index 69bc482a..299fd821 100644 --- a/src/module/core/api_func.py +++ b/src/module/core/api_func.py @@ -6,7 +6,7 @@ from module.manager import FullSeasonGet from module.rss import RSSAnalyser from module.utils import json_config from module.conf import DATA_PATH -from module.conf.config import save_config_to_file, CONFIG_PATH +from module.conf.config import CONFIG_PATH from module.models import Config from module.network import RequestContent diff --git a/src/module/database/connector.py b/src/module/database/connector.py index b6bdb692..135071a3 100644 --- a/src/module/database/connector.py +++ b/src/module/database/connector.py @@ -16,17 +16,18 @@ class DataConnector: CREATE TABLE IF NOT EXISTS bangumi ( id INTEGER PRIMARY KEY, official_title TEXT NOT NULL, + year INTEGER, title_raw TEXT NOT NULL, season INTEGER NOT NULL, season_raw TEXT NOT NULL, - subtitle TEXT, group_name TEXT, - source TEXT, dpi TEXT, + source TEXT, + subtitle TEXT, eps_collect INTEGER NOT NULL, offset INTEGER NOT NULL, filter TEXT NOT NULL, - rss TEXT NOT NULL + rss_link TEXT NOT NULL ); """ ) diff --git a/src/module/database/operator.py b/src/module/database/operator.py index 41069079..26f9a109 100644 --- a/src/module/database/operator.py +++ b/src/module/database/operator.py @@ -1,8 +1,10 @@ +import logging + from module.database.connector import DataConnector - - from module.models import BangumiData +logger = logging.getLogger(__name__) + class DataOperator(DataConnector): @staticmethod @@ -21,7 +23,7 @@ class DataOperator(DataConnector): if isinstance(item, int): if key not in ["id", "offset", "season"]: db_data[key] = bool(item) - elif key in ["filter", "rss"]: + elif key in ["filter", "rss_link"]: db_data[key] = item.split(",") return BangumiData(**db_data) @@ -32,35 +34,38 @@ class DataOperator(DataConnector): INSERT INTO bangumi ( id, official_title, + year, title_raw, season, season_raw, - subtitle, group_name, - source, dpi, + source, + subtitle, eps_collect, offset, filter, - rss + rss_link ) VALUES ( :id, :official_title, + :year, :title_raw, :season, :season_raw, - :subtitle, :group, - :source, :dpi, + :source, + :subtitle, :eps_collect, :offset, :filter, - :rss + :rss_link ) """, db_data, ) + logger.debug(f"Add {data.official_title} into database.") self._conn.commit() def insert_list(self, data: list[BangumiData]): @@ -70,31 +75,33 @@ class DataOperator(DataConnector): INSERT INTO bangumi ( id, official_title, + year, title_raw, season, season_raw, - subtitle, group_name, - source, dpi, + source, + subtitle, eps_collect, offset, filter, - rss + rss_link ) VALUES ( :id, :official_title, + :year, :title_raw, :season, :season_raw, - :subtitle, :group, - :source, :dpi, + :source, + :subtitle, :eps_collect, :offset, :filter, - :rss + :rss_link ) """, db_data, @@ -124,6 +131,19 @@ class DataOperator(DataConnector): self._conn.commit() return self._cursor.rowcount == 1 + def update_rss(self, title_raw, rss_set: list[str]): + self._cursor.execute( + """ + UPDATE bangumi SET + rss_link = :rss_link + WHERE title_raw = :title_raw + """, + {"rss_link": ",".join(rss_set), "title_raw": title_raw}, + ) + self._conn.commit() + logger.info(f"Update {title_raw} rss_link to {rss_set}.") + return self._cursor.rowcount == 1 + def search_id(self, _id: int) -> BangumiData | None: self._cursor.execute( """ @@ -156,40 +176,60 @@ class DataOperator(DataConnector): # Select all title_raw self._cursor.execute( """ - SELECT title_raw FROM bangumi + SELECT official_title FROM bangumi """ ) - title_raws = [x[0] for x in self._cursor.fetchall()] + db_titles = [x[0] for x in self._cursor.fetchall()] # Match title - for title_raw in title_raws: - if title_raw in title: + for db_title in db_titles: + if title == db_title: return True return False - def not_exist_titles(self, titles: list[str]) -> list[str]: + def not_exist_titles(self, titles: list[str], rss_link) -> list[str]: # Select all title_raw self._cursor.execute( """ - SELECT title_raw FROM bangumi + SELECT title_raw, rss_link FROM bangumi """ ) - title_raws = [x[0] for x in self._cursor.fetchall()] + data = self._cursor.fetchall() + if not data: + return titles # Match title - for title_raw in title_raws: + for title_raw, rss_set in data: + rss_set = rss_set.split(",") for title in titles: - if title_raw in title: - titles.remove(title) + if rss_link in rss_set: + if title_raw in title: + titles.remove(title) + elif rss_link not in rss_set: + rss_set.append(rss_link) + self.update_rss(title_raw, rss_set) return titles + def get_uncompleted(self) -> list[BangumiData] | None: + # Find eps_complete = False + self._cursor.execute( + """ + SELECT * FROM bangumi WHERE eps_collect == 1 + """ + ) + values = self._cursor.fetchall() + if values is None: + return None + keys = [x[0] for x in self._cursor.description] + dict_data = [dict(zip(keys, value)) for value in values] + return [self.db_to_data(x) for x in dict_data] + def gen_id(self) -> int: self._cursor.execute( """ SELECT id FROM bangumi ORDER BY id DESC LIMIT 1 """ ) - return self._cursor.fetchone()[0] + 1 + data = self._cursor.fetchone() + if data is None: + return 1 + return data[0] + 1 - -if __name__ == "__main__": - with DataOperator() as op: - pass diff --git a/src/module/manager/eps_complete.py b/src/module/manager/eps_complete.py index f22fc429..236b020b 100644 --- a/src/module/manager/eps_complete.py +++ b/src/module/manager/eps_complete.py @@ -6,6 +6,7 @@ from module.network import RequestContent from module.core import DownloadClient from module.models import BangumiData, Config +from module.database import DataOperator logger = logging.getLogger(__name__) @@ -71,7 +72,9 @@ class FullSeasonGet(DownloadClient): logger.info("Completed!") data.eps_collect = False - def eps_complete(self, datas: list[BangumiData]): + def eps_complete(self): + with DataOperator() as op: + datas = op.get_uncompleted() for data in datas: if data.eps_collect: self.download_season(data) diff --git a/src/module/models/bangumi.py b/src/module/models/bangumi.py index 4eca2c11..afeedc96 100644 --- a/src/module/models/bangumi.py +++ b/src/module/models/bangumi.py @@ -16,8 +16,8 @@ class BangumiData(BaseModel): eps_collect: bool = Field(False, alias="eps_collect", title="是否已收集") offset: int = Field(0, alias="offset", title="番剧偏移量") filter: list[str] = Field(..., alias="filter", title="番剧过滤器") - rss: list[str] = Field(None, alias="rss", title="番剧RSS链接") - poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接") + rss_link: list[str] = Field(..., alias="rss_link", title="番剧RSS链接") + # poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接") class ProgramData(BaseModel): diff --git a/src/module/parser/title_parser.py b/src/module/parser/title_parser.py index aa412e6b..fd763192 100644 --- a/src/module/parser/title_parser.py +++ b/src/module/parser/title_parser.py @@ -39,7 +39,7 @@ class TitleParser: official_title = official_title if official_title else title return official_title, tmdb_season - def raw_parser(self, raw: str, settings: Config, _id: int = 0) -> BangumiData: + def raw_parser(self, raw: str, rss_link: str, settings: Config, _id: int = 0) -> BangumiData: language = settings.rss_parser.language try: episode = raw_parser(raw) @@ -70,7 +70,7 @@ class TitleParser: eps_collect=True if episode.episode > 1 else False, offset=0, filter=settings.rss_parser.filter, - rss=rss_link, + rss_link=[rss_link], ) logger.debug(f"RAW:{raw} >> {episode.title_en}") return data diff --git a/src/module/rss/rss_analyser.py b/src/module/rss/rss_analyser.py index d7888873..a4e25457 100644 --- a/src/module/rss/rss_analyser.py +++ b/src/module/rss/rss_analyser.py @@ -1,9 +1,8 @@ -import re import logging from module.network import RequestContent from module.parser import TitleParser -from module.models import BangumiData, Config +from module.models import Config from module.database import DataOperator from module.core import DownloadClient @@ -16,24 +15,24 @@ class RSSAnalyser(DownloadClient): self._title_analyser = TitleParser() self.settings = settings - def rss_to_datas(self, rss_link: str) -> list[BangumiData]: + def rss_to_datas(self, rss_link: str): with RequestContent() as req: rss_torrents = req.get_torrents(rss_link) title_list = [torrent.name for torrent in rss_torrents] - data_list = [] with DataOperator() as op: - add_title_list = op.not_exist_titles(title_list) + add_title_list = op.not_exist_titles(title_list, rss_link) + if not add_title_list: + logger.debug("No new title found.") + return _id = op.gen_id() for raw_title in add_title_list: data = self._title_analyser.raw_parser( - raw=raw_title, _id=_id, settings=self.settings + raw=raw_title, _id=_id, settings=self.settings, rss_link=rss_link ) - if data is not None and op.match_title(data.official_title) is None: + if data is not None and not op.match_title(data.official_title): op.insert(data) self.set_rule(data, rss_link) - data_list.append(data) _id += 1 - return data_list def rss_to_data(self, url, _filter: bool = True): with RequestContent() as req: @@ -41,7 +40,7 @@ class RSSAnalyser(DownloadClient): for torrent in rss_torrents: try: data = self._title_analyser.raw_parser( - torrent.name, settings=self.settings + torrent.name, settings=self.settings, rss_link=url ) self.set_rule(data, url) except Exception as e: @@ -50,6 +49,14 @@ class RSSAnalyser(DownloadClient): def run(self, rss_link: str): logger.info("Start collecting RSS info.") try: - return self.rss_to_datas(rss_link) + self.rss_to_datas(rss_link) except Exception as e: logger.debug(e) + + +if __name__ == '__main__': + from module.conf import settings, setup_logger + setup_logger(settings) + link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d" + with RSSAnalyser(settings) as analyser: + analyser.rss_to_datas(link)