Migrate rssanalyser from json to sqlite

This commit is contained in:
EstrellaXD
2023-05-07 23:31:18 +08:00
parent d7c9a5f5de
commit 4d956f70a0
8 changed files with 104 additions and 55 deletions

View File

@@ -1,11 +1,9 @@
import os
import time
import logging
import asyncio
from module.conf import setup_logger, LOG_PATH, RSSLink, VERSION
from module.conf import LOG_PATH, VERSION
from module.core import DownloadClient
from module.manager import Renamer, FullSeasonGet
from module.rss import RSSAnalyser
from module.models import Config
@@ -23,8 +21,8 @@ async def rss_loop(
rss_link: str,
settings: Config,
):
with RSSAnalyser(settings) as rss:
rss.rss_to_datas(rss_link)
with RSSAnalyser(settings) as analyser:
analyser.rss_to_datas(rss_link)
if settings.bangumi_manage.eps_complete:
with FullSeasonGet(settings) as season:
season.eps_complete()

View File

@@ -6,7 +6,7 @@ from module.manager import FullSeasonGet
from module.rss import RSSAnalyser
from module.utils import json_config
from module.conf import DATA_PATH
from module.conf.config import save_config_to_file, CONFIG_PATH
from module.conf.config import CONFIG_PATH
from module.models import Config
from module.network import RequestContent

View File

@@ -16,17 +16,18 @@ class DataConnector:
CREATE TABLE IF NOT EXISTS bangumi (
id INTEGER PRIMARY KEY,
official_title TEXT NOT NULL,
year INTEGER,
title_raw TEXT NOT NULL,
season INTEGER NOT NULL,
season_raw TEXT NOT NULL,
subtitle TEXT,
group_name TEXT,
source TEXT,
dpi TEXT,
source TEXT,
subtitle TEXT,
eps_collect INTEGER NOT NULL,
offset INTEGER NOT NULL,
filter TEXT NOT NULL,
rss TEXT NOT NULL
rss_link TEXT NOT NULL
);
"""
)

View File

@@ -1,8 +1,10 @@
import logging
from module.database.connector import DataConnector
from module.models import BangumiData
logger = logging.getLogger(__name__)
class DataOperator(DataConnector):
@staticmethod
@@ -21,7 +23,7 @@ class DataOperator(DataConnector):
if isinstance(item, int):
if key not in ["id", "offset", "season"]:
db_data[key] = bool(item)
elif key in ["filter", "rss"]:
elif key in ["filter", "rss_link"]:
db_data[key] = item.split(",")
return BangumiData(**db_data)
@@ -32,35 +34,38 @@ class DataOperator(DataConnector):
INSERT INTO bangumi (
id,
official_title,
year,
title_raw,
season,
season_raw,
subtitle,
group_name,
source,
dpi,
source,
subtitle,
eps_collect,
offset,
filter,
rss
rss_link
) VALUES (
:id,
:official_title,
:year,
:title_raw,
:season,
:season_raw,
:subtitle,
:group,
:source,
:dpi,
:source,
:subtitle,
:eps_collect,
:offset,
:filter,
:rss
:rss_link
)
""",
db_data,
)
logger.debug(f"Add {data.official_title} into database.")
self._conn.commit()
def insert_list(self, data: list[BangumiData]):
@@ -70,31 +75,33 @@ class DataOperator(DataConnector):
INSERT INTO bangumi (
id,
official_title,
year,
title_raw,
season,
season_raw,
subtitle,
group_name,
source,
dpi,
source,
subtitle,
eps_collect,
offset,
filter,
rss
rss_link
) VALUES (
:id,
:official_title,
:year,
:title_raw,
:season,
:season_raw,
:subtitle,
:group,
:source,
:dpi,
:source,
:subtitle,
:eps_collect,
:offset,
:filter,
:rss
:rss_link
)
""",
db_data,
@@ -124,6 +131,19 @@ class DataOperator(DataConnector):
self._conn.commit()
return self._cursor.rowcount == 1
def update_rss(self, title_raw, rss_set: list[str]):
self._cursor.execute(
"""
UPDATE bangumi SET
rss_link = :rss_link
WHERE title_raw = :title_raw
""",
{"rss_link": ",".join(rss_set), "title_raw": title_raw},
)
self._conn.commit()
logger.info(f"Update {title_raw} rss_link to {rss_set}.")
return self._cursor.rowcount == 1
def search_id(self, _id: int) -> BangumiData | None:
self._cursor.execute(
"""
@@ -156,40 +176,60 @@ class DataOperator(DataConnector):
# Select all title_raw
self._cursor.execute(
"""
SELECT title_raw FROM bangumi
SELECT official_title FROM bangumi
"""
)
title_raws = [x[0] for x in self._cursor.fetchall()]
db_titles = [x[0] for x in self._cursor.fetchall()]
# Match title
for title_raw in title_raws:
if title_raw in title:
for db_title in db_titles:
if title == db_title:
return True
return False
def not_exist_titles(self, titles: list[str]) -> list[str]:
def not_exist_titles(self, titles: list[str], rss_link) -> list[str]:
# Select all title_raw
self._cursor.execute(
"""
SELECT title_raw FROM bangumi
SELECT title_raw, rss_link FROM bangumi
"""
)
title_raws = [x[0] for x in self._cursor.fetchall()]
data = self._cursor.fetchall()
if not data:
return titles
# Match title
for title_raw in title_raws:
for title_raw, rss_set in data:
rss_set = rss_set.split(",")
for title in titles:
if title_raw in title:
titles.remove(title)
if rss_link in rss_set:
if title_raw in title:
titles.remove(title)
elif rss_link not in rss_set:
rss_set.append(rss_link)
self.update_rss(title_raw, rss_set)
return titles
def get_uncompleted(self) -> list[BangumiData] | None:
# Find eps_complete = False
self._cursor.execute(
"""
SELECT * FROM bangumi WHERE eps_collect == 1
"""
)
values = self._cursor.fetchall()
if values is None:
return None
keys = [x[0] for x in self._cursor.description]
dict_data = [dict(zip(keys, value)) for value in values]
return [self.db_to_data(x) for x in dict_data]
def gen_id(self) -> int:
self._cursor.execute(
"""
SELECT id FROM bangumi ORDER BY id DESC LIMIT 1
"""
)
return self._cursor.fetchone()[0] + 1
data = self._cursor.fetchone()
if data is None:
return 1
return data[0] + 1
if __name__ == "__main__":
with DataOperator() as op:
pass

View File

@@ -6,6 +6,7 @@ from module.network import RequestContent
from module.core import DownloadClient
from module.models import BangumiData, Config
from module.database import DataOperator
logger = logging.getLogger(__name__)
@@ -71,7 +72,9 @@ class FullSeasonGet(DownloadClient):
logger.info("Completed!")
data.eps_collect = False
def eps_complete(self, datas: list[BangumiData]):
def eps_complete(self):
with DataOperator() as op:
datas = op.get_uncompleted()
for data in datas:
if data.eps_collect:
self.download_season(data)

View File

@@ -16,8 +16,8 @@ class BangumiData(BaseModel):
eps_collect: bool = Field(False, alias="eps_collect", title="是否已收集")
offset: int = Field(0, alias="offset", title="番剧偏移量")
filter: list[str] = Field(..., alias="filter", title="番剧过滤器")
rss: list[str] = Field(None, alias="rss", title="番剧RSS链接")
poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
rss_link: list[str] = Field(..., alias="rss_link", title="番剧RSS链接")
# poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
class ProgramData(BaseModel):

View File

@@ -39,7 +39,7 @@ class TitleParser:
official_title = official_title if official_title else title
return official_title, tmdb_season
def raw_parser(self, raw: str, settings: Config, _id: int = 0) -> BangumiData:
def raw_parser(self, raw: str, rss_link: str, settings: Config, _id: int = 0) -> BangumiData:
language = settings.rss_parser.language
try:
episode = raw_parser(raw)
@@ -70,7 +70,7 @@ class TitleParser:
eps_collect=True if episode.episode > 1 else False,
offset=0,
filter=settings.rss_parser.filter,
rss=rss_link,
rss_link=[rss_link],
)
logger.debug(f"RAW:{raw} >> {episode.title_en}")
return data

View File

@@ -1,9 +1,8 @@
import re
import logging
from module.network import RequestContent
from module.parser import TitleParser
from module.models import BangumiData, Config
from module.models import Config
from module.database import DataOperator
from module.core import DownloadClient
@@ -16,24 +15,24 @@ class RSSAnalyser(DownloadClient):
self._title_analyser = TitleParser()
self.settings = settings
def rss_to_datas(self, rss_link: str) -> list[BangumiData]:
def rss_to_datas(self, rss_link: str):
with RequestContent() as req:
rss_torrents = req.get_torrents(rss_link)
title_list = [torrent.name for torrent in rss_torrents]
data_list = []
with DataOperator() as op:
add_title_list = op.not_exist_titles(title_list)
add_title_list = op.not_exist_titles(title_list, rss_link)
if not add_title_list:
logger.debug("No new title found.")
return
_id = op.gen_id()
for raw_title in add_title_list:
data = self._title_analyser.raw_parser(
raw=raw_title, _id=_id, settings=self.settings
raw=raw_title, _id=_id, settings=self.settings, rss_link=rss_link
)
if data is not None and op.match_title(data.official_title) is None:
if data is not None and not op.match_title(data.official_title):
op.insert(data)
self.set_rule(data, rss_link)
data_list.append(data)
_id += 1
return data_list
def rss_to_data(self, url, _filter: bool = True):
with RequestContent() as req:
@@ -41,7 +40,7 @@ class RSSAnalyser(DownloadClient):
for torrent in rss_torrents:
try:
data = self._title_analyser.raw_parser(
torrent.name, settings=self.settings
torrent.name, settings=self.settings, rss_link=url
)
self.set_rule(data, url)
except Exception as e:
@@ -50,6 +49,14 @@ class RSSAnalyser(DownloadClient):
def run(self, rss_link: str):
logger.info("Start collecting RSS info.")
try:
return self.rss_to_datas(rss_link)
self.rss_to_datas(rss_link)
except Exception as e:
logger.debug(e)
if __name__ == '__main__':
from module.conf import settings, setup_logger
setup_logger(settings)
link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d"
with RSSAnalyser(settings) as analyser:
analyser.rss_to_datas(link)