mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-29 13:02:03 +08:00
优化数据库操作,完善RSS解析器逻辑
This commit is contained in:
@@ -45,7 +45,7 @@ class DownloadClient:
|
||||
prefs = self.client.get_app_prefs()
|
||||
self.download_path = os.path.join(prefs["save_path"], "Bangumi")
|
||||
|
||||
def set_rule(self, info: BangumiData, rss_link):
|
||||
def set_rule(self, info: BangumiData):
|
||||
official_name, raw_name, season, group = (
|
||||
info.official_title,
|
||||
info.title_raw,
|
||||
@@ -60,7 +60,7 @@ class DownloadClient:
|
||||
"episodeFilter": "",
|
||||
"smartFilter": False,
|
||||
"previouslyMatchedEpisodes": [],
|
||||
"affectedFeeds": [rss_link],
|
||||
"affectedFeeds": info.rss_link,
|
||||
"ignoreDays": 0,
|
||||
"lastMatch": "",
|
||||
"addPaused": False,
|
||||
|
||||
@@ -27,7 +27,9 @@ class DataConnector:
|
||||
eps_collect INTEGER NOT NULL,
|
||||
offset INTEGER NOT NULL,
|
||||
filter TEXT NOT NULL,
|
||||
rss_link TEXT NOT NULL
|
||||
rss_link TEXT NOT NULL,
|
||||
poster_link TEXT,
|
||||
added INTEGER NOT NULL
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -21,7 +21,7 @@ class DataOperator(DataConnector):
|
||||
def db_to_data(db_data: dict) -> BangumiData:
|
||||
for key, item in db_data.items():
|
||||
if isinstance(item, int):
|
||||
if key not in ["id", "offset", "season"]:
|
||||
if key not in ["id", "offset", "season", "year"]:
|
||||
db_data[key] = bool(item)
|
||||
elif key in ["filter", "rss_link"]:
|
||||
db_data[key] = item.split(",")
|
||||
@@ -45,7 +45,9 @@ class DataOperator(DataConnector):
|
||||
eps_collect,
|
||||
offset,
|
||||
filter,
|
||||
rss_link
|
||||
rss_link,
|
||||
poster_link,
|
||||
added
|
||||
) VALUES (
|
||||
:id,
|
||||
:official_title,
|
||||
@@ -60,7 +62,9 @@ class DataOperator(DataConnector):
|
||||
:eps_collect,
|
||||
:offset,
|
||||
:filter,
|
||||
:rss_link
|
||||
:rss_link,
|
||||
:poster_link,
|
||||
:added
|
||||
)
|
||||
""",
|
||||
db_data,
|
||||
@@ -86,7 +90,9 @@ class DataOperator(DataConnector):
|
||||
eps_collect,
|
||||
offset,
|
||||
filter,
|
||||
rss_link
|
||||
rss_link,
|
||||
poster_link,
|
||||
added
|
||||
) VALUES (
|
||||
:id,
|
||||
:official_title,
|
||||
@@ -101,7 +107,9 @@ class DataOperator(DataConnector):
|
||||
:eps_collect,
|
||||
:offset,
|
||||
:filter,
|
||||
:rss_link
|
||||
:rss_link,
|
||||
:poster_link,
|
||||
:added
|
||||
)
|
||||
""",
|
||||
db_data,
|
||||
@@ -125,7 +133,9 @@ class DataOperator(DataConnector):
|
||||
eps_collect = :eps_collect,
|
||||
offset = :offset,
|
||||
filter = :filter,
|
||||
rss_link = :rss_link
|
||||
rss_link = :rss_link,
|
||||
poster_link = :poster_link,
|
||||
added = :added
|
||||
WHERE id = :id
|
||||
""",
|
||||
db_data,
|
||||
@@ -133,18 +143,18 @@ class DataOperator(DataConnector):
|
||||
self._conn.commit()
|
||||
return self._cursor.rowcount == 1
|
||||
|
||||
def update_rss(self, title_raw, rss_set: list[str]):
|
||||
def update_column(self, title_raw: str, column: str, value: str):
|
||||
|
||||
def update_rss(self, title_raw, rss_set: str):
|
||||
# Update rss and select all data
|
||||
self._cursor.execute(
|
||||
"""
|
||||
UPDATE bangumi SET
|
||||
rss_link = :rss_link
|
||||
WHERE title_raw = :title_raw
|
||||
UPDATE bangumi SET rss_link = :rss_link WHERE title_raw = :title_raw
|
||||
""",
|
||||
{"rss_link": ",".join(rss_set), "title_raw": title_raw},
|
||||
{"rss_link": rss_set, "title_raw": title_raw},
|
||||
)
|
||||
self._conn.commit()
|
||||
logger.info(f"Update {title_raw} rss_link to {rss_set}.")
|
||||
return self._cursor.rowcount == 1
|
||||
|
||||
def search_id(self, _id: int) -> BangumiData | None:
|
||||
self._cursor.execute(
|
||||
@@ -194,7 +204,7 @@ class DataOperator(DataConnector):
|
||||
)
|
||||
return self._cursor.fetchone() is not None
|
||||
|
||||
def match_list(self, title_dict: dict) -> dict:
|
||||
def match_list(self, title_dict: dict, rss_link: str) -> dict:
|
||||
# Match title_raw in database
|
||||
self._cursor.execute(
|
||||
"""
|
||||
@@ -203,15 +213,17 @@ class DataOperator(DataConnector):
|
||||
)
|
||||
data = self._cursor.fetchall()
|
||||
if not data:
|
||||
return {}
|
||||
return title_dict
|
||||
# Match title
|
||||
for title, rss_link in title_dict.items():
|
||||
for title in title_dict.copy().keys():
|
||||
for title_raw, rss_set in data:
|
||||
if rss_link in rss_set and title_raw in title:
|
||||
del title_dict[title]
|
||||
elif rss_link not in rss_set and title_raw in title:
|
||||
# TODO: Logic problem
|
||||
break
|
||||
if title_raw in title:
|
||||
if rss_link in rss_set:
|
||||
title_dict.pop(title)
|
||||
else:
|
||||
rss_set += "," + rss_link
|
||||
self.update_rss(title_raw, rss_set)
|
||||
break
|
||||
return title_dict
|
||||
|
||||
def not_exist_titles(self, titles: list[str], rss_link) -> list[str]:
|
||||
|
||||
@@ -17,7 +17,8 @@ class BangumiData(BaseModel):
|
||||
offset: int = Field(0, alias="offset", title="番剧偏移量")
|
||||
filter: list[str] = Field(..., alias="filter", title="番剧过滤器")
|
||||
rss_link: list[str] = Field(..., alias="rss_link", title="番剧RSS链接")
|
||||
# poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
|
||||
poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
|
||||
added: bool = Field(False, alias="added", title="是否已添加")
|
||||
|
||||
|
||||
class Notification(BaseModel):
|
||||
|
||||
@@ -13,6 +13,7 @@ FILTER = "|".join(settings.rss_parser.filter)
|
||||
class TorrentInfo:
|
||||
name: str
|
||||
torrent_link: str
|
||||
homepage: str = None
|
||||
|
||||
|
||||
class RequestContent(RequestURL):
|
||||
@@ -29,22 +30,24 @@ class RequestContent(RequestURL):
|
||||
torrent_homepage.append(item.find("link").text)
|
||||
|
||||
torrents = []
|
||||
for _title, torrent_url in zip(torrent_titles, torrent_urls):
|
||||
for _title, torrent_url, homepage in zip(torrent_titles, torrent_urls, torrent_homepage):
|
||||
if _filter:
|
||||
if re.search(FILTER, _title) is None:
|
||||
torrents.append(TorrentInfo(_title, torrent_url))
|
||||
torrents.append(TorrentInfo(_title, torrent_url, homepage))
|
||||
else:
|
||||
torrents.append(TorrentInfo(_title, torrent_url))
|
||||
torrents.append(TorrentInfo(_title, torrent_url, homepage))
|
||||
return torrents
|
||||
|
||||
def get_poster(self, _url):
|
||||
content = self.get_html(_url).text
|
||||
def get_mikan_info(self, _url) -> tuple[str, str]:
|
||||
content = self.get_html(_url)
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
div = soup.find("div", {"class": "bangumi-poster"})
|
||||
style = div.get("style")
|
||||
if style:
|
||||
return style.split("url('")[1].split("')")[0]
|
||||
return None
|
||||
poster_div = soup.find("div", {"class": "bangumi-poster"})
|
||||
poster_style = poster_div.get("style")
|
||||
official_title = soup.select_one('p.bangumi-title a[href^="/Home/Bangumi/"]').text
|
||||
if poster_style:
|
||||
poster_path = poster_style.split("url('")[1].split("')")[0]
|
||||
return poster_path, official_title
|
||||
return "", ""
|
||||
|
||||
def get_xml(self, _url) -> xml.etree.ElementTree.Element:
|
||||
return xml.etree.ElementTree.fromstring(self.get_url(_url).text)
|
||||
|
||||
@@ -47,13 +47,8 @@ class TitleParser:
|
||||
}
|
||||
title_search = episode.title_zh if episode.title_zh else episode.title_en
|
||||
title_raw = episode.title_en if episode.title_en else episode.title_zh
|
||||
if settings.rss_parser.enable_tmdb:
|
||||
official_title, _season = self.tmdb_parser(
|
||||
title_search, episode.season, language
|
||||
)
|
||||
else:
|
||||
official_title = titles[language] if titles[language] else titles["zh"]
|
||||
_season = episode.season
|
||||
official_title = titles[language] if titles[language] else titles["zh"]
|
||||
_season = episode.season
|
||||
data = BangumiData(
|
||||
id=_id,
|
||||
official_title=official_title,
|
||||
|
||||
@@ -2,39 +2,56 @@ import logging
|
||||
|
||||
from module.network import RequestContent
|
||||
from module.parser import TitleParser
|
||||
from module.models import Config
|
||||
from module.models import Config, BangumiData
|
||||
from module.database import DataOperator
|
||||
from module.core import DownloadClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RSSAnalyser(DownloadClient):
|
||||
class RSSAnalyser:
|
||||
def __init__(self, settings: Config):
|
||||
super().__init__(settings)
|
||||
self._title_analyser = TitleParser()
|
||||
self.settings = settings
|
||||
|
||||
def rss_to_datas(self, rss_link: str):
|
||||
def rss_to_datas(self, rss_link: str) -> list[BangumiData]:
|
||||
with RequestContent() as req:
|
||||
rss_torrents = req.get_torrents(rss_link)
|
||||
title_dict = {torrent.name: rss_link for torrent in rss_torrents}
|
||||
title_dict = {torrent.name: torrent.homepage for torrent in rss_torrents}
|
||||
with DataOperator() as op:
|
||||
update_dict = op.match_list(title_dict)
|
||||
if not update_dict:
|
||||
new_dict = op.match_list(title_dict, rss_link)
|
||||
print(new_dict)
|
||||
if not new_dict:
|
||||
logger.debug("No new title found.")
|
||||
return
|
||||
return []
|
||||
_id = op.gen_id()
|
||||
for raw_title in add_title_list:
|
||||
data = self._title_analyser.raw_parser(
|
||||
raw=raw_title, _id=_id, settings=self.settings, rss_link=rss_link
|
||||
)
|
||||
if data is not None:
|
||||
op.insert(data)
|
||||
self.set_rule(data, rss_link)
|
||||
_id += 1
|
||||
new_data = []
|
||||
# New List
|
||||
with RequestContent() as req:
|
||||
for raw_title, homepage in new_dict.items():
|
||||
data = self._title_analyser.raw_parser(
|
||||
raw=raw_title, settings=self.settings, rss_link=rss_link, _id=_id
|
||||
)
|
||||
if data is not None:
|
||||
poster_link, official_title = req.get_mikan_info(homepage)
|
||||
data.poster_link = poster_link
|
||||
# Official title type
|
||||
if self.settings.rss_parser.parser_type == "mikan":
|
||||
data.official_title = official_title
|
||||
elif self.settings.rss_parser.parser_type == "tmdb":
|
||||
official_title, year, season = self._title_analyser.tmdb_parser()
|
||||
data.official_title = official_title
|
||||
data.year = year
|
||||
data.season = season
|
||||
else:
|
||||
pass
|
||||
new_data.append(data)
|
||||
_id += 1
|
||||
logger.debug(f"New title found: {data.official_title}")
|
||||
op.insert_list(new_data)
|
||||
return new_data
|
||||
|
||||
def rss_to_data(self, url, _filter: bool = True):
|
||||
def rss_to_data(self, url, _filter: bool = True) -> BangumiData:
|
||||
with RequestContent() as req:
|
||||
rss_torrents = req.get_torrents(url, _filter)
|
||||
for torrent in rss_torrents:
|
||||
@@ -42,7 +59,12 @@ class RSSAnalyser(DownloadClient):
|
||||
data = self._title_analyser.raw_parser(
|
||||
torrent.name, settings=self.settings, rss_link=url
|
||||
)
|
||||
self.set_rule(data, url)
|
||||
if data is not None:
|
||||
with DataOperator() as op:
|
||||
_id = op.gen_id()
|
||||
data.id = _id
|
||||
op.insert(data)
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.debug(e)
|
||||
|
||||
@@ -58,5 +80,4 @@ if __name__ == '__main__':
|
||||
from module.conf import settings, setup_logger
|
||||
setup_logger(settings)
|
||||
link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d"
|
||||
with RSSAnalyser(settings) as analyser:
|
||||
analyser.rss_to_datas(link)
|
||||
data = RSSAnalyser(settings).rss_to_datas(link)
|
||||
|
||||
Reference in New Issue
Block a user