优化数据库操作,完善RSS解析器逻辑

This commit is contained in:
EstrellaXD
2023-05-09 09:49:13 +08:00
parent afd8e343ba
commit 328ab56a21
7 changed files with 95 additions and 61 deletions

View File

@@ -45,7 +45,7 @@ class DownloadClient:
prefs = self.client.get_app_prefs()
self.download_path = os.path.join(prefs["save_path"], "Bangumi")
def set_rule(self, info: BangumiData, rss_link):
def set_rule(self, info: BangumiData):
official_name, raw_name, season, group = (
info.official_title,
info.title_raw,
@@ -60,7 +60,7 @@ class DownloadClient:
"episodeFilter": "",
"smartFilter": False,
"previouslyMatchedEpisodes": [],
"affectedFeeds": [rss_link],
"affectedFeeds": info.rss_link,
"ignoreDays": 0,
"lastMatch": "",
"addPaused": False,

View File

@@ -27,7 +27,9 @@ class DataConnector:
eps_collect INTEGER NOT NULL,
offset INTEGER NOT NULL,
filter TEXT NOT NULL,
rss_link TEXT NOT NULL
rss_link TEXT NOT NULL,
poster_link TEXT,
added INTEGER NOT NULL
);
"""
)

View File

@@ -21,7 +21,7 @@ class DataOperator(DataConnector):
def db_to_data(db_data: dict) -> BangumiData:
for key, item in db_data.items():
if isinstance(item, int):
if key not in ["id", "offset", "season"]:
if key not in ["id", "offset", "season", "year"]:
db_data[key] = bool(item)
elif key in ["filter", "rss_link"]:
db_data[key] = item.split(",")
@@ -45,7 +45,9 @@ class DataOperator(DataConnector):
eps_collect,
offset,
filter,
rss_link
rss_link,
poster_link,
added
) VALUES (
:id,
:official_title,
@@ -60,7 +62,9 @@ class DataOperator(DataConnector):
:eps_collect,
:offset,
:filter,
:rss_link
:rss_link,
:poster_link,
:added
)
""",
db_data,
@@ -86,7 +90,9 @@ class DataOperator(DataConnector):
eps_collect,
offset,
filter,
rss_link
rss_link,
poster_link,
added
) VALUES (
:id,
:official_title,
@@ -101,7 +107,9 @@ class DataOperator(DataConnector):
:eps_collect,
:offset,
:filter,
:rss_link
:rss_link,
:poster_link,
:added
)
""",
db_data,
@@ -125,7 +133,9 @@ class DataOperator(DataConnector):
eps_collect = :eps_collect,
offset = :offset,
filter = :filter,
rss_link = :rss_link
rss_link = :rss_link,
poster_link = :poster_link,
added = :added
WHERE id = :id
""",
db_data,
@@ -133,18 +143,18 @@ class DataOperator(DataConnector):
self._conn.commit()
return self._cursor.rowcount == 1
def update_rss(self, title_raw, rss_set: list[str]):
def update_column(self, title_raw: str, column: str, value: str):
def update_rss(self, title_raw, rss_set: str):
# Update rss and select all data
self._cursor.execute(
"""
UPDATE bangumi SET
rss_link = :rss_link
WHERE title_raw = :title_raw
UPDATE bangumi SET rss_link = :rss_link WHERE title_raw = :title_raw
""",
{"rss_link": ",".join(rss_set), "title_raw": title_raw},
{"rss_link": rss_set, "title_raw": title_raw},
)
self._conn.commit()
logger.info(f"Update {title_raw} rss_link to {rss_set}.")
return self._cursor.rowcount == 1
def search_id(self, _id: int) -> BangumiData | None:
self._cursor.execute(
@@ -194,7 +204,7 @@ class DataOperator(DataConnector):
)
return self._cursor.fetchone() is not None
def match_list(self, title_dict: dict) -> dict:
def match_list(self, title_dict: dict, rss_link: str) -> dict:
# Match title_raw in database
self._cursor.execute(
"""
@@ -203,15 +213,17 @@ class DataOperator(DataConnector):
)
data = self._cursor.fetchall()
if not data:
return {}
return title_dict
# Match title
for title, rss_link in title_dict.items():
for title in title_dict.copy().keys():
for title_raw, rss_set in data:
if rss_link in rss_set and title_raw in title:
del title_dict[title]
elif rss_link not in rss_set and title_raw in title:
# TODO: Logic problem
break
if title_raw in title:
if rss_link in rss_set:
title_dict.pop(title)
else:
rss_set += "," + rss_link
self.update_rss(title_raw, rss_set)
break
return title_dict
def not_exist_titles(self, titles: list[str], rss_link) -> list[str]:

View File

@@ -17,7 +17,8 @@ class BangumiData(BaseModel):
offset: int = Field(0, alias="offset", title="番剧偏移量")
filter: list[str] = Field(..., alias="filter", title="番剧过滤器")
rss_link: list[str] = Field(..., alias="rss_link", title="番剧RSS链接")
# poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
poster_link: str | None = Field(None, alias="poster_link", title="番剧海报链接")
added: bool = Field(False, alias="added", title="是否已添加")
class Notification(BaseModel):

View File

@@ -13,6 +13,7 @@ FILTER = "|".join(settings.rss_parser.filter)
class TorrentInfo:
name: str
torrent_link: str
homepage: str = None
class RequestContent(RequestURL):
@@ -29,22 +30,24 @@ class RequestContent(RequestURL):
torrent_homepage.append(item.find("link").text)
torrents = []
for _title, torrent_url in zip(torrent_titles, torrent_urls):
for _title, torrent_url, homepage in zip(torrent_titles, torrent_urls, torrent_homepage):
if _filter:
if re.search(FILTER, _title) is None:
torrents.append(TorrentInfo(_title, torrent_url))
torrents.append(TorrentInfo(_title, torrent_url, homepage))
else:
torrents.append(TorrentInfo(_title, torrent_url))
torrents.append(TorrentInfo(_title, torrent_url, homepage))
return torrents
def get_poster(self, _url):
content = self.get_html(_url).text
def get_mikan_info(self, _url) -> tuple[str, str]:
content = self.get_html(_url)
soup = BeautifulSoup(content, "html.parser")
div = soup.find("div", {"class": "bangumi-poster"})
style = div.get("style")
if style:
return style.split("url('")[1].split("')")[0]
return None
poster_div = soup.find("div", {"class": "bangumi-poster"})
poster_style = poster_div.get("style")
official_title = soup.select_one('p.bangumi-title a[href^="/Home/Bangumi/"]').text
if poster_style:
poster_path = poster_style.split("url('")[1].split("')")[0]
return poster_path, official_title
return "", ""
def get_xml(self, _url) -> xml.etree.ElementTree.Element:
return xml.etree.ElementTree.fromstring(self.get_url(_url).text)

View File

@@ -47,13 +47,8 @@ class TitleParser:
}
title_search = episode.title_zh if episode.title_zh else episode.title_en
title_raw = episode.title_en if episode.title_en else episode.title_zh
if settings.rss_parser.enable_tmdb:
official_title, _season = self.tmdb_parser(
title_search, episode.season, language
)
else:
official_title = titles[language] if titles[language] else titles["zh"]
_season = episode.season
official_title = titles[language] if titles[language] else titles["zh"]
_season = episode.season
data = BangumiData(
id=_id,
official_title=official_title,

View File

@@ -2,39 +2,56 @@ import logging
from module.network import RequestContent
from module.parser import TitleParser
from module.models import Config
from module.models import Config, BangumiData
from module.database import DataOperator
from module.core import DownloadClient
logger = logging.getLogger(__name__)
class RSSAnalyser(DownloadClient):
class RSSAnalyser:
def __init__(self, settings: Config):
super().__init__(settings)
self._title_analyser = TitleParser()
self.settings = settings
def rss_to_datas(self, rss_link: str):
def rss_to_datas(self, rss_link: str) -> list[BangumiData]:
with RequestContent() as req:
rss_torrents = req.get_torrents(rss_link)
title_dict = {torrent.name: rss_link for torrent in rss_torrents}
title_dict = {torrent.name: torrent.homepage for torrent in rss_torrents}
with DataOperator() as op:
update_dict = op.match_list(title_dict)
if not update_dict:
new_dict = op.match_list(title_dict, rss_link)
print(new_dict)
if not new_dict:
logger.debug("No new title found.")
return
return []
_id = op.gen_id()
for raw_title in add_title_list:
data = self._title_analyser.raw_parser(
raw=raw_title, _id=_id, settings=self.settings, rss_link=rss_link
)
if data is not None:
op.insert(data)
self.set_rule(data, rss_link)
_id += 1
new_data = []
# New List
with RequestContent() as req:
for raw_title, homepage in new_dict.items():
data = self._title_analyser.raw_parser(
raw=raw_title, settings=self.settings, rss_link=rss_link, _id=_id
)
if data is not None:
poster_link, official_title = req.get_mikan_info(homepage)
data.poster_link = poster_link
# Official title type
if self.settings.rss_parser.parser_type == "mikan":
data.official_title = official_title
elif self.settings.rss_parser.parser_type == "tmdb":
official_title, year, season = self._title_analyser.tmdb_parser()
data.official_title = official_title
data.year = year
data.season = season
else:
pass
new_data.append(data)
_id += 1
logger.debug(f"New title found: {data.official_title}")
op.insert_list(new_data)
return new_data
def rss_to_data(self, url, _filter: bool = True):
def rss_to_data(self, url, _filter: bool = True) -> BangumiData:
with RequestContent() as req:
rss_torrents = req.get_torrents(url, _filter)
for torrent in rss_torrents:
@@ -42,7 +59,12 @@ class RSSAnalyser(DownloadClient):
data = self._title_analyser.raw_parser(
torrent.name, settings=self.settings, rss_link=url
)
self.set_rule(data, url)
if data is not None:
with DataOperator() as op:
_id = op.gen_id()
data.id = _id
op.insert(data)
return data
except Exception as e:
logger.debug(e)
@@ -58,5 +80,4 @@ if __name__ == '__main__':
from module.conf import settings, setup_logger
setup_logger(settings)
link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d"
with RSSAnalyser(settings) as analyser:
analyser.rss_to_datas(link)
data = RSSAnalyser(settings).rss_to_datas(link)