2.5.0-beta2

This commit is contained in:
EstrellaXD
2022-06-14 21:49:51 +08:00
parent 85899517b9
commit 9bcefd76cb
31 changed files with 266 additions and 231 deletions

2
.gitignore vendored
View File

@@ -167,3 +167,5 @@ cython_debug/
/auto_bangumi/parser/analyser/tmdb.py

2
.idea/misc.xml generated
View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (np_veclib)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (auto_bangumi)" project-jdk-type="Python SDK" />
</project>

View File

@@ -13,4 +13,4 @@ ADD ./auto_bangumi /auto_bangumi
ADD ./config /config
ADD ./templates /templates
CMD [ "run.sh"]
CMD ["python3", "app.py"]

View File

@@ -1,4 +1,5 @@
import app
if __name__ == "__main__":
app.run()

View File

@@ -2,15 +2,14 @@ import os
import time
import logging
from conf import settings
from conf.conf import settings
from conf.argument_parser import parse
from conf.log import setup_logger
from utils import json_config
from mikanani.rss_collector import RSSCollector
from core.rss_analyser import RSSAnalyser
from core.download_client import DownloadClient
from core.renamer import Renamer
from network.request import RequestsURL
logger = logging.getLogger(__name__)
@@ -55,6 +54,9 @@ def show_info():
logger.info("Starting AutoBangumi...")
def run():
# DEBUG 模式初始化
args = parse()
@@ -71,40 +73,18 @@ def run():
show_info()
time.sleep(3)
download_client = DownloadClient()
url_request = RequestsURL()
download_client.init_downloader()
if settings.rss_link is None:
logger.error("Please add RIGHT RSS url.")
quit()
download_client.rss_feed()
rss_collector = RSSCollector(url_request)
rss_analyser = RSSAnalyser()
rename = Renamer(download_client)
# 主程序循环
while True:
bangumi_data = load_data_file()
try:
# 解析 RSS
rss_collector.collect(bangumi_data)
# 历史剧集收集
if settings.enable_eps_complete:
download_client.eps_collect(bangumi_data["bangumi_info"], url_request)
url_request.close()
download_client.add_rules(bangumi_data["bangumi_info"], settings.rss_link)
# 首次等待
if bangumi_data["first_run"]:
logger.info(f"Waiting for downloading torrents...")
time.sleep(settings.first_sleep)
bangumi_data["first_run"] = False
save_data_file(bangumi_data)
# rename
if settings.method != "none":
rename.refresh()
rename.run()
time.sleep(settings.sleep_time)
except Exception as e:
if args.debug:
raise e
logger.exception(e)
bangumi_data = json_config.load(settings.info_path)
rss_analyser.rss_to_data(bangumi_data["bangumi_info"])
download_client.add_rules(bangumi_data["bangumi_info"])
if __name__ == "__main__":

View File

@@ -1,39 +0,0 @@
import logging
logger = logging.getLogger(__name__)
from preprocessor import Preprocessor
from token_generator import TokenGenerator
from analyser import Analyser
class Parser:
def __init__(self) -> None:
self._preprocessor = Preprocessor()
self._token_generator = TokenGenerator()
self._analyser = Analyser()
def parse(self, name: str):
name = self._preprocessor.preprocess(name)
tokens = self._token_generator.generate(name)
episode = self._analyser.analyse(name, tokens)
return episode, tokens, name
if __name__ == "__main__":
import sys, os
sys.path.append(os.path.dirname(".."))
from conf.log import setup_logger
from conf.const import BCOLORS
setup_logger()
parser = Parser()
with (open("parser/names.txt", "r", encoding="utf-8") as f):
for name in f:
if name != "":
episode, tokens, name = parser.parse(name)
if len(tokens) == 1:
logger.debug(f"{BCOLORS._(BCOLORS.HEADER, name)}")
logger.debug(f"{BCOLORS._(BCOLORS.OKGREEN,tokens)}")
logger.debug(f"{BCOLORS._(BCOLORS.WARNING,episode)}")

View File

@@ -1,5 +1,5 @@
import os
import const
from conf import const
class Settings(dict):

View File

@@ -1,7 +1,7 @@
# -*- encoding: utf-8 -*-
DEFAULT_SETTINGS = {
"version": "2.5.0-beta1",
"version": "2.5.0-beta2",
"data_version": 4.0,
"host_ip": "localhost:8080",
"sleep_time": 1800,

View File

@@ -1,15 +1,11 @@
import re
import logging
import os
import time
from downloader import getClient
from downloader.exceptions import ConflictError
from conf import settings
from mikanani.eps_complete import FullSeasonGet
from conf.conf import settings
logger = logging.getLogger(__name__)
@@ -73,7 +69,7 @@ class DownloadClient:
self.client.rss_add_feed(url=rss_link, item_path=item_path)
logger.info("Add RSS Feed successfully.")
def add_rules(self, bangumi_info, rss_link):
def add_rules(self, bangumi_info, rss_link=settings.rss_link):
logger.info("Start adding rules.")
for info in bangumi_info:
if not info["added"]:
@@ -81,28 +77,6 @@ class DownloadClient:
info["added"] = True
logger.info("Finished.")
def eps_collect(self, bangumi_info, request):
logger.info("Start collecting past episodes.")
for info in bangumi_info:
if info["download_past"]:
downloads = FullSeasonGet(
info["group"],
info["title"],
info["season"],
info["subtitle"],
info["source"],
info["dpi"],
request
).add_torrents_info()
for download in downloads:
self.client.torrents_add(
urls=download["url"],
save_path=download["save_path"],
category="Bangumi"
)
time.sleep(settings.connect_retry_interval)
info["download_past"] = False
def get_torrent_info(self):
return self.client.torrents_info(
status_filter="completed", category="Bangumi"
@@ -114,12 +88,19 @@ class DownloadClient:
)
logger.info(f"{path_name} >> {new_name}")
def delete_torrent(self, hash):
def delete_torrent(self, hashes):
self.client.torrents_delete(
hash
hashes
)
logger.info(f"Remove bad torrents.")
def add_torrent(self, torrent: dict):
self.client.torrents_add(
urls=torrent["url"],
save_path=torrent["save_path"],
category="Bangumi"
)
if __name__ == "__main__":
put = DownloadClient()

View File

@@ -3,7 +3,7 @@ import logging
from bs4 import BeautifulSoup
from conf import settings
from conf.conf import settings
from utils import json_config
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,50 @@
import os.path
import re
import logging
from conf.conf import settings
from network.rss_collector import GetRssInfo
from core.download_client import DownloadClient
logger = logging.getLogger(__name__)
class FullSeasonGet:
def __init__(self):
self._get_rss = GetRssInfo()
def init_eps_complete_search_str(self, data: dict):
search_str = re.sub(r"[\W_]", "+",
f"{data['group']} {data['title_raw']} {data['season_raw']} {data['subtitle']} {data['source']} {data['dpi']}")
return search_str
def get_season_torrents(self, data: dict):
keyword = self.init_eps_complete_search_str(data)
torrents = self._get_rss.get_torrents(f"https://mikanani.me/RSS/Search?str={keyword}")
return torrents
def collect_season_torrents(self, data: dict):
torrents = self.get_season_torrents(data)
downloads = []
for torrent in torrents:
download_info = {
"url": torrent["url"],
"save_path": os.path.join(
settings.download_path,
data["official_title"],
f"Season {data['season']}")
}
downloads.append(download_info)
return downloads
def eps_complete(self, bangumi_info, download_client: DownloadClient):
for data in bangumi_info:
if data["eps_complete"]:
logger.info(f"Start collecting past episodes of {data['official_title']} Season {data['season']}...")
downloads = self.collect_season_torrents(data)
for download in downloads:
download_client.add_torrent(download)
logger.info("Completed!")
data["eps_complete"] = False

View File

@@ -0,0 +1,43 @@
import re
import logging
from network.rss_collector import GetRssInfo
from parser.parser_api import TitleParser
from conf.conf import settings
from core.download_client import DownloadClient
logger = logging.getLogger(__name__)
class RSSAnalyser:
def __init__(self):
self._title_analyser = TitleParser()
self._get_rss = GetRssInfo()
def rss_to_data(self, bangumi_info: list):
rss_titles = self._get_rss.get_titles(settings.rss_link)
for raw_title in rss_titles:
logger.info(raw_title)
extra_add = True
for d in bangumi_info:
if re.search(d["title_raw"], raw_title) is not None:
extra_add = False
break
if extra_add:
data = self._title_analyser.return_dict(raw_title)
if data["official_title"] not in bangumi_info:
bangumi_info.append(data)
def run(self, bangumi_info: list, download_client: DownloadClient):
self.rss_to_data(bangumi_info)
download_client.add_rules(bangumi_info)
if __name__ == "__main__":
from conf.const_dev import DEV_SETTINGS
settings.init(DEV_SETTINGS)
ra = RSSAnalyser()
data = []
ra.rss_to_data(data)
print(data)

View File

@@ -1,9 +1,10 @@
from conf import settings
from conf.conf import settings
def getClient():
host=settings.host_ip
username=settings.user_name
password=settings.password
host = settings.host_ip
username = settings.user_name
password = settings.password
# TODO 多下载器支持
# 从 settings 里读取下载器名称,然后返回对应 Client
from downloader.qb_downloader import QbDownloader

View File

@@ -1,49 +0,0 @@
import os.path
import re
import requests
from bs4 import BeautifulSoup
import logging
from conf import settings
from network.request import RequestsURL
logger = logging.getLogger(__name__)
class FullSeasonGet:
def __init__(self, group, bangumi_name, season, sub, source, dpi , request: RequestsURL):
self.bangumi_name = re.sub(settings.rule_name_re, " ", bangumi_name).strip()
self.group = "" if group is None else group
self.season = season
self.subtitle = "" if sub is None else sub
self.source = "" if source is None else source
self.dpi = dpi
self._req = request
def get_season_rss(self):
if self.season == "S01":
season = ""
else:
season = self.season
search_str = re.sub(r"[\W_]", "+",
f"{self.group} {self.bangumi_name} {season} {self.subtitle} {self.source} {self.dpi}")
season = self._req.get_url(search_str)
torrents = season.find_all("enclosure")
return torrents
def add_torrents_info(self):
torrents = self.get_season_rss()
downloads = []
for torrent in torrents:
download_info = {
"url": torrent["url"],
"save_path": os.path.join(
settings.download_path,
self.bangumi_name,
self.season)
}
downloads.append(download_info)
return downloads

View File

@@ -1,19 +1,19 @@
# -*- coding: UTF-8 -*-
import logging
from conf import settings
from bangumi_parser.analyser.rss_parser import ParserLV2
from bangumi_parser.fuzz_match import FuzzMatch
from network.request import RequestsURL
from bs4 import BeautifulSoup
from conf.conf import settings
from parser.analyser.raw_parser import ParserLV2
from parser.fuzz_match import FuzzMatch
logger = logging.getLogger(__name__)
class RSSCollector:
def __init__(self, request: RequestsURL):
def __init__(self):
self._simple_analyser = ParserLV2()
self._fuzz_match = FuzzMatch()
self._req = request
def title_parser(self, title, fuzz_match=True):
episode = self._simple_analyser.analyse(title)
@@ -42,9 +42,8 @@ class RSSCollector:
}
return episode, data, title_official
def collect(self, bangumi_data):
req = self._req.get_url(settings.rss_link)
items = req.find_all("item")
def collect(self, bangumi_data, rss_data: BeautifulSoup):
items = rss_data.find_all("item")
for item in items:
add = True
name = item.title.string
@@ -59,9 +58,8 @@ class RSSCollector:
bangumi_data["bangumi_info"].append(data)
logger.info(f"Adding {title_official} Season {episode.season_info.number}")
def collect_collection(self, rss_link):
req = self._req.get_url(rss_link)
item = req.find("item")
def collect_collection(self, rss_data: BeautifulSoup):
item = rss_data.find("item")
title = item.title.string
_, data, _ = self.title_parser(title, fuzz_match=False)
return data

View File

@@ -5,42 +5,46 @@ import logging
from bs4 import BeautifulSoup
from conf import settings
from conf.conf import settings
logger = logging.getLogger(__name__)
class RequestsURL:
class RequestURL:
def __init__(self):
self.session = requests.session()
if settings.http_proxy is not None:
self.proxy = {
"https": settings.http_proxy,
"http": settings.http_proxy
"http": settings.http_proxy,
"socks": settings.http_proxy
}
else:
self.proxy = None
self.header = {
"user-agent": "Mozilla/5.0",
"Accept": "application/xml"
}
def get_url(self, url):
times = 1
times = 0
while times < 5:
try:
req = self.session.get(url, proxies=self.proxy)
return BeautifulSoup(req.text, "xml")
except Exception:
# logger.exception(e)
req = self.session.get(url=url, headers=self.header, proxies=self.proxy)
return req
except Exception as e:
logger.debug(e)
logger.error("ERROR with DNS/Connection.")
time.sleep(settings.connect_retry_interval)
times += 1
def get_content(self, url, content="xml"):
if content == "xml":
return BeautifulSoup(self.get_url(url).text, content)
elif content == "json":
return self.get_url(url).json()
def close(self):
self.session.close()
if __name__ == "__main__":
network_req = RequestsURL()
req = network_req.get_url("https://mikanani.me/RSS/Classic")
print(req.find_all("item"))
network_req.close()
req = network_req.get_url("https://mikanani.me/RSS/Classic")
print(req.find_all("item"))

View File

@@ -0,0 +1,35 @@
import time
from network.request import RequestURL
class GetRssInfo:
def __init__(self):
self._req = RequestURL()
# Mikanani RSS
def get_titles(self, url):
soup = self._req.get_content(url)
items = soup.find_all("item")
time.sleep(1)
return [item.title.string for item in items]
def get_title(self, url):
soup = self._req.get_content(url)
item = soup.find("item")
time.sleep(1)
return item.title
def get_torrents(self, url):
soup = self._req.get_content(url)
enclosure = soup.find_all("enclosure")
time.sleep(1)
return [t["url"] for t in enclosure]
if __name__ == "__main__":
rss = GetRssInfo()
try:
rss.get_title("https://adsasd.com")
except Exception as e:
print("connect failed")

View File

@@ -7,7 +7,7 @@ import logging
from mikanani.rss_collector import RSSCollector
from core.download_client import DownloadClient
from conf import settings
from conf.conf import settings
from utils import json_config
logger = logging.getLogger(__name__)

View File

@@ -1,4 +1,4 @@
from bangumi_parser.episode import Episode
from parser.episode import Episode
class Analyser():
def analyse(self,name, tokens) -> Episode:

View File

@@ -1,13 +1,12 @@
import logging
import re
from utils import json_config
from conf import settings
from bangumi_parser.episode import Episode
from conf.conf import settings
from parser.episode import Episode
logger = logging.getLogger(__name__)
class ParserLV2:
class RawParser:
def __init__(self) -> None:
self._info = Episode()
@@ -30,39 +29,15 @@ class ParserLV2:
@staticmethod
def season_process(name_season):
season_rule = r"S\d{1,2}|Season \d{1,2}|[第].[季期]"
season_map = {
"": 1,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 7,
"": 8,
"": 9,
"": 10,
}
name_season = re.sub(r"[\[\]]", " ", name_season)
seasons = re.findall(season_rule, name_season)
if not seasons:
name = name_season
season_number = 1
season_raw = "S01" if settings.season_one_tag else ""
season_raw = ""
else:
name = re.sub(season_rule, "", name_season)
for season in seasons:
season_raw = season
if re.search(r"S|Season", season) is not None:
season_number = int(re.sub(r"S|Season", "", season))
break
elif re.search(r"[第 ].*[季期]", season) is not None:
season_pro = re.sub(r"[第季期 ]", "", season)
try:
season_number = int(season_pro)
except ValueError:
season_number = season_map[season_pro]
break
return name, season_number, season_raw
season_raw = seasons[0]
return name, season_raw
@staticmethod
def name_process(name):
@@ -114,16 +89,16 @@ class ParserLV2:
raw_name,
)
name_season = self.second_process(match_obj.group(1))
name, season_number, season_raw = self.season_process(name_season)
name, season_raw = self.season_process(name_season)
name, name_group = self.name_process(name)
episode = int(re.findall(r"\d{1,3}", match_obj.group(2))[0])
other = match_obj.group(3).strip()
sub, dpi, source= self.find_tags(other)
return name, season_number, season_raw, episode, sub, dpi, source, name_group
return name, season_raw, episode, sub, dpi, source, name_group
def analyse(self, raw) -> Episode:
try:
self._info.title, self._info.season_info.number,\
self._info.title,\
self._info.season_info.raw, self._info.ep_info.number,\
self._info.subtitle, self._info.dpi, self._info.source, \
self._info.title_info.group = self.process(raw)
@@ -133,6 +108,6 @@ class ParserLV2:
if __name__ == "__main__":
test = ParserLV2()
ep = test.analyse("【幻樱字幕组】【4月新番】【古见同学有交流障碍症 Komi-san wa, Komyushou Desu.】【22】【GB_MP4】【1920X1080】")
print(ep.title)
test = RawParser()
ep = test.analyse("【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】")
print(ep.season_info.raw)

View File

@@ -5,7 +5,8 @@ from os import path
logger = logging.getLogger(__name__)
class EPParser:
class DownloadEPParser:
def __init__(self):
self.rules = [
r"(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
@@ -37,8 +38,13 @@ class EPParser:
)
return new_name
def rename_none(self, name):
return name
def download_rename(self, name, method):
if method.lower() == "pn":
return self.rename_pn(name)
elif method.lower() == "normal":
return self.rename_normal(name)
elif method.lower() == "none":
return name
if __name__ == "__main__":

View File

@@ -1,7 +1,7 @@
import re
import logging
import requests
from conf import settings
from conf.conf import settings
from utils import json_config
from bangumi_parser.episode import Episode

View File

@@ -1,7 +1,7 @@
from thefuzz import fuzz
import logging
from utils import json_config
from conf import settings
from conf.conf import settings
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,51 @@
import logging
from parser.analyser.raw_parser import RawParser
from parser.analyser.rename_parser import DownloadEPParser
from parser.analyser.tmdb import TMDBinfo
from conf.conf import settings
logger = logging.getLogger(__name__)
class TitleParser:
def __init__(self):
self._raw_parser = RawParser()
self._download_parser = DownloadEPParser()
self.tmdb = TMDBinfo
def raw_parser(self, raw):
return self._raw_parser.analyse(raw)
def download_parser(self, download_raw, method=settings.method):
return self._download_parser.download_rename(download_raw, method)
def return_dict(self, raw):
episode = self.raw_parser(raw)
try:
tmdb_info = self.tmdb.tmdb_search(episode.title)
official_title = tmdb_info.title_jp
season = tmdb_info.last_season
except Exception as e:
logger.debug(e)
logger.info("No data in TMDB")
official_title = episode.title
season = episode.season_info.raw
return {
"official_title": official_title,
"title_raw": episode.title,
"season": season,
"season_raw": episode.season_info.raw,
"group": episode.group,
"dpi": episode.dpi,
"source": episode.source,
"subtitle": episode.subtitle,
"added": False,
"eps_collect": True if settings.eps_complete else False,
}
if __name__ == "__main__":
raw = "[离谱Sub] 朋友游戏 / トモダチゲーム / Tomodachi Game [10][AVC AAC][1080p][简体内嵌] [401.7MB]"
p = TitleParser()
print(p.return_dict(raw))

View File

@@ -1,4 +0,0 @@
#!/bin/bash
exec python3 app.py -d&
exec python3 web.py