Merge pull request #24 from findix/main

refactor and minor bug fix
This commit is contained in:
Estrella Pan
2022-06-02 10:00:30 +08:00
committed by GitHub
16 changed files with 863 additions and 305 deletions

163
.gitignore vendored Normal file
View File

@@ -0,0 +1,163 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Custom
/AutoBangumi/config/bangumi.json

80
.idea/.gitignore generated vendored
View File

@@ -1,3 +1,77 @@
# Default ignored files
/shelf/
/workspace.xml
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

17
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,17 @@
{
// 使用 IntelliSense 了解相关属性。
// 悬停以查看现有属性的描述。
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: docker_main",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/AutoBangumi/app/docker_main.py",
"cwd": "${workspaceFolder}/AutoBangumi/app",
"console": "integratedTerminal",
"justMyCode": true
}
]
}

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"python.formatting.provider": "black"
}

View File

@@ -4,6 +4,20 @@ import zhconv
import logging
from RssFilter.fliter_base import *
logger = logging.getLogger(__name__)
handler = logging.FileHandler(
filename="RssFilter/rename_log.txt",
mode="w",
encoding="utf-8"
)
handler.setFormatter(
logging.Formatter(
"%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
)
)
logger.level = logging.WARNING
logger.addHandler(handler)
class RSSInfoCleaner:
class Name:
@@ -32,20 +46,72 @@ class RSSInfoCleaner:
self.file_name = file_name
self.Name.raw = file_name # 接收文件名参数
self.clean() # 清理广告等杂质
# 加载日志,匹配特征等
logging.basicConfig(level=logging.WARN,
filename='RssFilter/rename_log.txt',
filemode='w',
format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s')
self.group_character = ['字幕社', '字幕组', '字幕屋', '发布组', "连载", '动漫', '国漫', '汉化', 'raw', 'works', '工作室', '压制', '合成',
'制作', '搬运', '委员会', '家族', '译制', '动画', '研究所', 'sub', '翻译', '联盟', 'dream', '-rip', 'neo',
'team', "百合组", "慕留人", "行动组"]
self.group_char = ['dmhy', '澄空学园', 'c.c动漫', "vcb", 'amor', 'moozzi2', 'skytree', 'sweetsub', 'pcsub', 'ahu-sub',
'f宅', 'captions', 'dragsterps', 'onestar', "lolihouse", "天空树", "妇联奶子", "不够热", "烤肉同好", '卡通',
'时雨初空', 'nyaa', 'ddd', 'koten', 'reinforce', '届恋对邦小队', 'cxraw', "witex.io"]
with open("../config/clean_rule.json", encoding='utf-8') as file_obj:
# 匹配特征等
self.group_character = [
"字幕社",
"字幕组",
"字幕屋",
"发布",
"连载组",
"动漫",
"国漫",
"汉化",
"raw",
"works",
"工作室",
"压制",
"合成",
"制作",
"搬运",
"委员会",
"家族",
"译制",
"动画",
"研究所",
"sub",
"翻译",
"联盟",
"dream",
"-rip",
"neo",
"team",
"百合组",
"慕留人",
"行动组",
]
self.group_char = [
"dmhy",
"澄空学园",
"c.c动漫",
"vcb",
"amor",
"moozzi2",
"skytree",
"sweetsub",
"pcsub",
"ahu-sub",
"f宅",
"captions",
"dragsterps",
"onestar",
"lolihouse",
"天空树",
"妇联奶子",
"不够热",
"烤肉同好",
"卡通",
"时雨初空",
"nyaa",
"ddd",
"koten",
"reinforce",
"届恋对邦小队",
"cxraw",
"witex.io",
]
with open("../config/clean_rule.json", encoding="utf-8") as file_obj:
rule_json = json.load(file_obj)[0]["group_name"]
self.group_rule = [zhconv.convert(x, 'zh-cn') for x in rule_json]
self.group_rule = [zhconv.convert(x, "zh-cn") for x in rule_json]
self.file_info = {}
self.pre_analyse = None
@@ -70,40 +136,82 @@ class RSSInfoCleaner:
# 清理原链接(中文字符替换为英文)
def clean(self):
file_name = zhconv.convert(self.Name.raw, 'zh-cn')
file_name = zhconv.convert(self.Name.raw, "zh-cn")
# 去广告
file_name = re.sub("[(\[【]?(字幕)?[\u4e00-\u9fa5、]{0,3}(新人|招募?新?)[\u4e00-\u9fa5、]{0,8}[)\]】]?", "", file_name)
file_name = re.sub(
"[(\[【]?(字幕)?[\u4e00-\u9fa5、]{0,3}(新人|招募?新?)[\u4e00-\u9fa5、]{0,8}[)\]】]?",
"",
file_name,
)
# 除杂
file_name = re.sub("[(\[【]?★?((网飞)?\d{4}年[春夏秋冬]?)?[\d一二三四五六七八九十]{1,2}月新?番?★?[)\]】]?", "", file_name)
file_name = re.sub(
"[(\[【]?★?((网飞)?\d{4}年[春夏秋冬]?)?[\d一二三四五六七八九十]{1,2}月新?番?★?[)\]】]?",
"",
file_name,
)
# 除杂x2
file_name = re.sub("[(\[【 ](2\d{3})[)\]】 ]", " ", file_name)
# 除杂x3
file_name = re.sub("[(\[【]?((网飞)?2(\d{3}[年.][春夏秋冬]?)\d{1,2}\.?\d{1,2})[)\]】]?", "", file_name)
file_name = re.sub(
"[(\[【]?((网飞)?2(\d{3}[年.][春夏秋冬]?)\d{1,2}\.?\d{1,2})[)\]】]?", "", file_name
)
# 除杂x4
file_name = re.sub("[(\[【]检索.*[)\]】]?", "", file_name)
strip = ["特效歌词", "复制磁连", "兼容", "配音", "网盘", "\u200b", "[PSV&PC]", "Rv40", "R10", "Fin]", "Fin ", "[mkv]", "[]",
"", ""]
strip = [
"特效歌词",
"复制磁连",
"兼容",
"配音",
"网盘",
"\u200b",
"[PSV&PC]",
"Rv40",
"R10",
"Fin]",
"Fin ",
"[mkv]",
"[]",
"",
"",
]
file_name = del_rules(file_name, strip)
# xx_xx_xx
f_res = re.search("]?(([a-zA-Z:.。,!]{1,10})[_\[ ]){2,}", file_name)
if f_res is not None:
file_name = file_name.replace(f_res.group(), "%s/" % f_res.group().replace("_"," "))
file_name = file_name.replace(
f_res.group(), "%s/" % f_res.group().replace("_", " ")
)
# 中文_英文名_
f_res = re.search("_[a-zA-Z_ \-·、.。,!]*[_)\]】]", file_name)
# !!!重要
if f_res is not None:
file_name = file_name.replace(f_res.group(), "/%s/" % f_res.group().strip("_"))
file_name = file_name.replace(
f_res.group(), "/%s/" % f_res.group().strip("_")
)
# 日文.英文名
f_res = re.search("([\u4e00-\u9fa5\u3040-\u31ff\d:\-·、.。,!]{1,20}\.)([a-zA-Z\d:\-.。,!]{1,20} ?){2,}",
file_name)
f_res = re.search(
"([\u4e00-\u9fa5\u3040-\u31ff\d:\-·、.。,!]{1,20}\.)([a-zA-Z\d:\-.。,!]{1,20} ?){2,}",
file_name,
)
if f_res is not None:
file_name = file_name.replace(f_res.group(1), "%s/" % f_res.group(1).strip("."))
file_name = file_name.replace(
f_res.group(1), "%s/" % f_res.group(1).strip(".")
)
self.Name.raw = str(file_name).replace('', ':').replace('', '[').replace('', ']').replace('-', '-') \
.replace('', '(').replace('', ')').replace("", "&").replace("X", "x").replace("×", "x") \
.replace("", "x").replace("__", "/")
self.Name.raw = (
str(file_name)
.replace("", ":")
.replace("", "[")
.replace("", "]")
.replace("-", "-")
.replace("", "(")
.replace("", ")")
.replace("", "&")
.replace("X", "x")
.replace("×", "x")
.replace("", "x")
.replace("__", "/")
)
# 检索字幕组特征
def recognize_group(self):
@@ -115,8 +223,14 @@ class RSSInfoCleaner:
# !强规则,人工录入标准名,区分大小写,优先匹配
for char in rule:
if ("&" + char) in self.file_name or (char + "&") in self.file_name:
self.pre_analyse = re.search("[(\[【]?(.*?(&%s|%s&).*?)[)\]】]?" % (char, char), self.file_name).group(
1).lower()
self.pre_analyse = (
re.search(
"[(\[【]?(.*?(&%s|%s&).*?)[)\]】]?" % (char, char),
self.file_name,
)
.group(1)
.lower()
)
return "enforce"
else:
if char in self.file_name:
@@ -127,13 +241,19 @@ class RSSInfoCleaner:
str_split = self.Name.raw.lower().split("]")
# 检索特征值是否位于文件名第1、2、最后一段
for char in character:
if char in str_split[0] or char in str_split[1] or char in str_split[-1]:
if (
char in str_split[0]
or char in str_split[1]
or char in str_split[-1]
):
self.pre_analyse = char
return "success"
# 文件名是否为 [字幕组名&字幕组名&字幕组名] ,求求了,一集的工作量真的需要三个组一起做吗
if "&" in str_split[0]:
# 限制匹配长度防止出bug
self.pre_analyse = str_split[0][1:] if len(str_split[0][1:]) < 15 else None
self.pre_analyse = (
str_split[0][1:] if len(str_split[0][1:]) < 15 else None
)
return "special"
# 再匹配不上我就麻了
self.pre_analyse = None
@@ -176,7 +296,10 @@ class RSSInfoCleaner:
gp = get_gp(res_char, self.Name.raw.lower())
return gp
except Exception as e:
logging.warning("bug -- res_char:%s,%s,%s" % (res_char, self.Name.raw.lower(), e))
logger.warning(
"bug -- res_char:%s,%s,%s"
% (res_char, self.Name.raw.lower(), e)
)
else:
return res_char
# 再见
@@ -185,11 +308,46 @@ class RSSInfoCleaner:
# 扒了6W数据硬找的参数没啥说的
def get_dpi(self):
file_name = self.Name.raw
dpi_list = ["4k", "2160p", "1440p", "1080p", "1036p", "816p", "810p", "720p", "576p", "544P", "540p", "480p",
"1080i", "1080+", "360p",
"3840x2160", "1920x1080", "1920x1036", "1920x804", "1920x800", "1536x864", "1452x1080", "1440x1080",
"1280x720", "1272x720", "1255x940", "1024x768", "1024X576", "960x720", "948x720", "896x672",
"872x480", "848X480", "832x624", "704x528", "640x480", "mp4_1080", "mp4_720"]
dpi_list = [
"4k",
"2160p",
"1440p",
"1080p",
"1036p",
"816p",
"810p",
"720p",
"576p",
"544P",
"540p",
"480p",
"1080i",
"1080+",
"360p",
"3840x2160",
"1920x1080",
"1920x1036",
"1920x804",
"1920x800",
"1536x864",
"1452x1080",
"1440x1080",
"1280x720",
"1272x720",
"1255x940",
"1024x768",
"1024X576",
"960x720",
"948x720",
"896x672",
"872x480",
"848X480",
"832x624",
"704x528",
"640x480",
"mp4_1080",
"mp4_720",
]
for i in dpi_list:
dpi = str(file_name).lower().find(i)
if dpi > 0:
@@ -203,23 +361,32 @@ class RSSInfoCleaner:
# 中文标示
try:
lang.append(
re.search("[(\[【 ]((tvb)?([粤简繁英俄][日中文体&/]?[_&]?){1,5})[)\]】]?", str(file_name)).group(
1).strip(" "))
re.search(
"[(\[【 ]((tvb)?([粤简繁英俄][日中文体&/]?[_&]?){1,5})[)\]】]?",
str(file_name),
)
.group(1)
.strip(" ")
)
except Exception as e:
logging.info(e)
logger.info(e)
# 中文标示
try:
lang.append(
re.search("[(\[【]?[粤中简繁英俄日文体](双?(语|字幕))[)\]】]?", str(file_name)).group(
1).strip(" "))
re.search("[(\[【]?[粤中简繁英俄日文体](双?(语|字幕))[)\]】]?", str(file_name))
.group(1)
.strip(" ")
)
except Exception as e:
logging.info(e)
logger.info(e)
# 英文标示
try:
lang = lang + re.search("[(\[【]?(((G?BIG5|CHT|CHS|GB|JPN?|CN)[/ _]?){1,3})[)\]】]?", str(file_name)).group(
1).lower().strip(" ").split(" ")
lang = lang + re.search(
"[(\[【]?(((G?BIG5|CHT|CHS|GB|JPN?|CN)[/ _]?){1,3})[)\]】]?",
str(file_name),
).group(1).lower().strip(" ").split(" ")
except Exception as e:
logging.info(e)
logger.info(e)
if lang:
return lang
else:
@@ -231,10 +398,16 @@ class RSSInfoCleaner:
type_list = []
# 英文标示
try:
type_list.append(re.search("[(\[【]?(((mp4|mkv|mp3)[ -]?){1,3})[)\]】]?",
str(file_name).lower()).group(1).strip(" "))
type_list.append(
re.search(
"[(\[【]?(((mp4|mkv|mp3)[ -]?){1,3})[)\]】]?",
str(file_name).lower(),
)
.group(1)
.strip(" ")
)
except Exception as e:
logging.info(e)
logger.info(e)
if type_list:
return type_list
else:
@@ -248,23 +421,25 @@ class RSSInfoCleaner:
try:
code = code + re.search(
"[(\[【]?([ _-]?([xh]26[45]|hevc|avc)){1,5}[ )\]】]?",
str(file_name).lower()).group(1).split(" ")
str(file_name).lower(),
).group(1).split(" ")
except Exception as e:
logging.info(e)
logger.info(e)
# 位深
try:
code = code + re.search(
"[(\[【]?[ _-]?((10|8)[ -]?bit)[ )\]】]?",
str(file_name).lower()).group(1).split(" ")
"[(\[【]?[ _-]?((10|8)[ -]?bit)[ )\]】]?", str(file_name).lower()
).group(1).split(" ")
except Exception as e:
logging.info(e)
logger.info(e)
# 音频编码
try:
code = code + re.search(
"[(\[【]?(([ _-]?((flac(x\d)?|aac|mp3|opus)(x\d)?)){1,5})[ )\]】]?",
str(file_name).lower()).group(3).split(" ")
str(file_name).lower(),
).group(3).split(" ")
except Exception as e:
logging.info(e)
logger.info(e)
if code:
return code
else:
@@ -277,13 +452,19 @@ class RSSInfoCleaner:
# 英文标示
for _ in range(3):
try:
res = re.search(
"[(\[【]?((bd|dvd|hd|remux|(viu)?tvb?|ani-one|bilibili|网飞(动漫)|b-?global|baha|web[ /-]?(dl|rip))[ -]?(b[o0]x|iso|mut|rip)?)[)\]】]?",
file_name).group(1).lower().strip(" ")
res = (
re.search(
"[(\[【]?((bd|dvd|hd|remux|(viu)?tvb?|ani-one|bilibili|网飞(动漫)|b-?global|baha|web[ /-]?(dl|rip))[ -]?(b[o0]x|iso|mut|rip)?)[)\]】]?",
file_name,
)
.group(1)
.lower()
.strip(" ")
)
if res not in type_list:
type_list.append(res)
except Exception as e:
logging.info(e)
logger.info(e)
for res in type_list:
file_name = file_name.replace(res, "")
if type_list:
@@ -297,15 +478,25 @@ class RSSInfoCleaner:
season = []
# 中文标示
try:
season.append(re.search(" ?(第?(\d{1,2}|[一二三])(部|季|季度|丁目))", str(file_name)).group(1).strip(" "))
season.append(
re.search(" ?(第?(\d{1,2}|[一二三])(部|季|季度|丁目))", str(file_name))
.group(1)
.strip(" ")
)
except Exception as e:
logging.info(e)
logger.info(e)
# 英文标示
try:
season.append(
re.search("((final ?)?(season|[ \[]s) ?\d{1,2}|\d{1,2}-?choume)", str(file_name)).group(1).strip(" "))
re.search(
"((final ?)?(season|[ \[]s) ?\d{1,2}|\d{1,2}-?choume)",
str(file_name),
)
.group(1)
.strip(" ")
)
except Exception as e:
logging.info(e)
logger.info(e)
if season:
return season
else:
@@ -318,39 +509,55 @@ class RSSInfoCleaner:
# _集国漫
try:
episode.append(
re.search("(_((\d+集-)?\d+集)|[ (\[第]\d+-\d+ ?)", str(file_name)).group(1))
re.search("(_((\d+集-)?\d+集)|[ (\[第]\d+-\d+ ?)", str(file_name)).group(1)
)
return episode
except Exception as e:
logging.info(e)
logger.info(e)
# [10 11]集点名批评这种命名方法,几个国漫的组
try:
episode.append(
re.search("[\[( ](\d{1,3}[- &_]\d{1,3}) ?(fin| Fin|\(全集\))?[ )\]]", str(file_name)).group(1))
re.search(
"[\[( ](\d{1,3}[- &_]\d{1,3}) ?(fin| Fin|\(全集\))?[ )\]]",
str(file_name),
).group(1)
)
return episode
except Exception as e:
logging.info(e)
logger.info(e)
# 这里匹配ova 剧场版 不带集数的合集 之类的
try:
episode.append(
re.search("[\[ 第](_\d{1,3}集|ova|剧场版|全|OVA ?\d{0,2}|合|[一二三四五六七八九十])[集话章 \]\[]", str(file_name)).group(1))
re.search(
"[\[ 第](_\d{1,3}集|ova|剧场版|全|OVA ?\d{0,2}|合|[一二三四五六七八九十])[集话章 \]\[]",
str(file_name),
).group(1)
)
return episode
except Exception as e:
logging.info(e)
logger.info(e)
# 标准单集 sp单集
try:
episode.append(
re.search("[\[ 第e]((sp|(数码)?重映)?(1?\d{1,3}(\.\d)?|1?\d{1,3}\(1?\d{1,3}\)))(v\d)?[集话章 \]\[]",
str(file_name)).group(1))
re.search(
"[\[ 第e]((sp|(数码)?重映)?(1?\d{1,3}(\.\d)?|1?\d{1,3}\(1?\d{1,3}\)))(v\d)?[集话章 \]\[]",
str(file_name),
).group(1)
)
return episode
except Exception as e:
logging.info(e)
logger.info(e)
# xx-xx集
try:
episode.append(
re.search("[\[ 第(]((合集)?\\\)?(\d{1,3}[ &]\d{1,3})(话| |]|\(全集\)|全集|fin)", str(file_name)).group(1))
re.search(
"[\[ 第(]((合集)?\\\)?(\d{1,3}[ &]\d{1,3})(话| |]|\(全集\)|全集|fin)",
str(file_name),
).group(1)
)
return episode
except Exception as e:
logging.info(e)
logger.info(e)
return None
# 获取版本
@@ -360,22 +567,28 @@ class RSSInfoCleaner:
# 中文
try:
vision.append(
re.search("[(\[【]?(([\u4e00-\u9fa5]{0,5}|v\d)((版本?|修[复正]|WEB限定)|片源?|内详|(特别篇))(话|版|合?集?))[)\]】]?",
str(file_name)).group(1))
re.search(
"[(\[【]?(([\u4e00-\u9fa5]{0,5}|v\d)((版本?|修[复正]|WEB限定)|片源?|内详|(特别篇))(话|版|合?集?))[)\]】]?",
str(file_name),
).group(1)
)
except Exception as e:
logging.info(e)
logger.info(e)
# 英文
try:
vision.append(
re.search("[(\[【 ]\d{1,2}((v\d)((版本?|修复?正?版)|片源?|内详)?)[)\]】]", str(file_name)).group(1))
re.search(
"[(\[【 ]\d{1,2}((v\d)((版本?|修复?正?版)|片源?|内详)?)[)\]】]",
str(file_name),
).group(1)
)
except Exception as e:
logging.info(e)
logger.info(e)
# [v2]
try:
vision.append(
re.search("[(\[【 ](v\d)[)\]】]", str(file_name)).group(1))
vision.append(re.search("[(\[【 ](v\d)[)\]】]", str(file_name)).group(1))
except Exception as e:
logging.info(e)
logger.info(e)
if vision:
return vision
else:
@@ -388,15 +601,24 @@ class RSSInfoCleaner:
# 中文标示
try:
ass.append(
re.search("[(\[【]?(附?([内外][挂嵌封][+&]?){1,2}(字幕|[简中日英]*音轨)?)[)\]】]?", str(file_name)).group(1))
re.search(
"[(\[【]?(附?([内外][挂嵌封][+&]?){1,2}(字幕|[简中日英]*音轨)?)[)\]】]?",
str(file_name),
).group(1)
)
except Exception as e:
logging.info(e)
logger.info(e)
# 英文标示
try:
ass.append(
re.search("[ (\[【+](([ +]?(ass|pgs|srt)){1,3})[)\]】]?", str(file_name)).group(1).strip(" "))
re.search(
"[ (\[【+](([ +]?(ass|pgs|srt)){1,3})[)\]】]?", str(file_name)
)
.group(1)
.strip(" ")
)
except Exception as e:
logging.info(e)
logger.info(e)
if ass:
return ass
else:
@@ -442,9 +664,15 @@ class RSSInfoCleaner:
# 混合验证
def all_verity(self, raw_name):
self.zh_list = re_verity(self.zh_list, raw_name) if self.zh_list is not None else None
self.en_list = re_verity(self.en_list, raw_name) if self.en_list is not None else None
self.jp_list = re_verity(self.jp_list, raw_name) if self.jp_list is not None else None
self.zh_list = (
re_verity(self.zh_list, raw_name) if self.zh_list is not None else None
)
self.en_list = (
re_verity(self.en_list, raw_name) if self.en_list is not None else None
)
self.jp_list = (
re_verity(self.jp_list, raw_name) if self.jp_list is not None else None
)
# 汇总信息
def get_clean_name(self):
@@ -459,7 +687,7 @@ class RSSInfoCleaner:
"ass": self.Tag.ass,
"type": self.Tag.type,
"code": self.Tag.code,
"source": self.Tag.source
"source": self.Tag.source,
}
# 字母全部小写
clean_name = self.Name.raw.lower()
@@ -469,21 +697,42 @@ class RSSInfoCleaner:
if v is not None:
if type(v) is list:
for i in v:
clean_name = clean_name.replace(i, "") if i is not None else clean_name
clean_name = (
clean_name.replace(i, "") if i is not None else clean_name
)
else:
clean_name = clean_name.replace(v, "")
# 除杂
x_list = ["pc&psp", "pc&psv", "movie", "bangumi.online", "donghua", "[_]",
"仅限港澳台地区", "话全", "第话", "第集", "全集", "字幕", "", "", "", "+", "@"]
x_list = [
"pc&psp",
"pc&psv",
"movie",
"bangumi.online",
"donghua",
"[_]",
"仅限港澳台地区",
"话全",
"第话",
"第集",
"全集",
"字幕",
"",
"",
"",
"+",
"@",
]
for i in x_list:
clean_name = clean_name.replace(i, "")
# 去除多余空格
clean_name = re.sub(' +', ' ', clean_name).strip(" ").strip("-").strip(" ")
clean_name = re.sub(" +", " ", clean_name).strip(" ").strip("-").strip(" ")
# 去除空括号
# !!! 不能删
clean_name = clean_name.replace("][", "/")
xx = re.search("[\u4e00-\u9fa5\u3040-\u31ff ]([(\[。_])[\u4e00-\u9fa5\a-z]", clean_name)
xx = re.search(
"[\u4e00-\u9fa5\u3040-\u31ff ]([(\[。_])[\u4e00-\u9fa5\a-z]", clean_name
)
if xx is not None:
clean_name = clean_name.replace(xx.group(1), "/")
clean_name = re.sub("([(\[] *| *[)\]])", "", clean_name)
@@ -497,7 +746,9 @@ class RSSInfoCleaner:
self.Name.zh, self.Name.en, self.Name.jp = None, None, None
# 国漫筛选
if "国漫" in self.Name.raw:
zh = re.search("-?([\u4e00-\u9fa5]{2,10})_?", self.Name.raw.replace("[国漫]", ""))
zh = re.search(
"-?([\u4e00-\u9fa5]{2,10})_?", self.Name.raw.replace("[国漫]", "")
)
if zh is not None:
self.Name.zh = clean_list([zh.group()])
return
@@ -508,12 +759,17 @@ class RSSInfoCleaner:
if en is not None:
self.Name.en = clean_list([en.group()])
return
elif re.search(
elif (
re.search(
"(^[\u4e00-\u9fa5\u3040-\u31ff\d:\-·??、.。,!]{1,20}[a-z\d]{,3} ??)([a-z\d:\-.。,! ]* ?)",
self.Name.clean) is not None:
self.Name.clean,
)
is not None
):
res = re.search(
"(^[\u4e00-\u9fa5\u3040-\u31ff\d:\-·??、.。,!]{1,20}[a-z\d]{,3} ??)[._&]?([a-z\d:\-.。,! ]* ?)",
self.Name.clean)
self.Name.clean,
)
zh = res.group(1)
en = res.group(2)
zh = re.search(zh, self.Name.raw.lower())
@@ -524,12 +780,17 @@ class RSSInfoCleaner:
self.Name.en = clean_list([en.group()])
return
# 英中
elif re.search(
elif (
re.search(
"(^([a-z\d:\-_.。,! ]* ?) ?)[._&]?([\u4e00-\u9fa5\u3040-\u31ffa-z\d:\-_·??、.。,! ]{1,20})",
self.Name.clean) is not None:
self.Name.clean,
)
is not None
):
res = re.search(
"(^([a-z\d:\-_.。,! ]* ?) ?)[._&]?([\u4e00-\u9fa5\u3040-\u31ffa-z\d:\-_·??、.。,! ]{1,20})",
self.Name.clean)
self.Name.clean,
)
zh = res.group(3)
en = res.group(1)
@@ -547,7 +808,9 @@ class RSSInfoCleaner:
return
if debug > 0:
print("初筛:\r\n%s\r\n%s\r\n%s" % (self.zh_list, self.en_list, self.jp_list))
if (has_zh(self.Name.clean) or has_jp(self.Name.clean)) and has_en(self.Name.clean):
if (has_zh(self.Name.clean) or has_jp(self.Name.clean)) and has_en(
self.Name.clean
):
self.Name.clean = add_separator(self.Name.clean)
self.easy_split(self.Name.clean, self.zh_list, self.en_list, self.jp_list)
@@ -586,7 +849,7 @@ class RSSInfoCleaner:
self.en_list.remove(i)
self.zh_list.append(res.group())
except Exception as e:
logging.info(e)
logger.info(e)
if debug > 0:
print("拼合:\r\n%s\r\n%s\r\n%s" % (self.zh_list, self.en_list, self.jp_list))
# 再次验证这里只能验raw名

View File

@@ -3,6 +3,7 @@ import logging
import csv
logger = logging.getLogger(__name__)
def read_data(file_name, start, rows):
if file_name == "mikan":
@@ -27,7 +28,7 @@ def add_separator(clean_name):
clean_name).group(1)
clean_name = clean_name.replace(res, res.strip(" ") + "/")
except Exception as e:
logging.info(e)
logger.exception(e)
else:
try:
res = re.search(
@@ -35,9 +36,9 @@ def add_separator(clean_name):
clean_name).group(1)
clean_name = clean_name.replace(res, res.strip(" ") + "/")
except Exception as e:
logging.info(e)
logger.exception(e)
except Exception as e:
logging.info(e)
logger.exception(e)
clean_name = re.sub("(/ */)", "/", clean_name)
return clean_name
@@ -48,10 +49,10 @@ def splicing(frag_list, name_list, raw_name):
for i in range(0, len(name_list) - 1):
if name_list[i] in name_list[i + 1] and name_list[i] != name_list[i + 1]:
name_list.remove(name_list[i])
elif raw_list[i + 1] in name_list[i] and name_list[i] != name_list[i + 1]:
elif name_list[i + 1] in name_list[i] and name_list[i] != name_list[i + 1]:
name_list.remove(name_list[i + 1])
except Exception as e:
logging.info(e)
logger.info(e)
min_list = sorted(name_list, key=lambda i: len(i), reverse=False)
for i in range(0, len(min_list) - 1):
# 处理中英文混合名
@@ -68,10 +69,10 @@ def splicing(frag_list, name_list, raw_name):
name_list.remove(piece_name)
name_list.append(r_name.group())
except Exception as e:
logging.warning("bug--%s" % e)
logging.warning("piece_name:%s,fragment:%s" % (piece_name, fragment))
logger.warning("bug--%s" % e)
logger.warning("piece_name:%s,fragment:%s" % (piece_name, fragment))
except Exception as e:
print(e)
logger.exception(e)
# 清理列表
@@ -89,7 +90,7 @@ def clean_list(raw_list):
elif raw_list[i + 1] in raw_list[i] and raw_list[i] != raw_list[i + 1]:
raw_list.remove(raw_list[i + 1])
except Exception as e:
logging.info(e)
logger.info(e)
if raw_list is not None and len(raw_list) > 1:
try:
for i in range(0, len(raw_list)):
@@ -97,7 +98,7 @@ def clean_list(raw_list):
if up_list[i] in up_list[-1] and up_list[i] != up_list[-1]:
raw_list.remove(up_list[i])
except Exception as e:
logging.info(e)
logger.info(e)
if raw_list:
return set(raw_list)
else:
@@ -119,7 +120,7 @@ def extract_title(raw_name):
title["zh"] = res.group(1).strip(" ")
title["en"] = res.group(3).strip(" ")
except Exception as e:
logging.info(e)
logger.info(e)
# 本程序依赖此bug运行这行不能删
if title["zh"] is None:
# 中英
@@ -130,7 +131,7 @@ def extract_title(raw_name):
title["zh"] = res.group(1).strip(" ")
title["en"] = res.group(3).strip(" ")
except Exception as e:
logging.info(e)
logger.info(e)
# 英中
try:
res = re.search(
@@ -139,7 +140,7 @@ def extract_title(raw_name):
title["en"] = res.group(1).strip(" ")
title["zh"] = res.group(3).strip(" ")
except Exception as e:
logging.info(e)
logger.info(e)
else:
if has_zh(clean_name):
# 中文
@@ -147,14 +148,14 @@ def extract_title(raw_name):
res = re.search("(([\u4e00-\u9fa5:]{2,15}[ /]?){1,5}) *", clean_name)
title["zh"] = res.group(1).strip(" ")
except Exception as e:
logging.info(e)
logger.info(e)
elif has_en(clean_name):
# 英文
try:
res = re.search("(([a-z:]{2,15}[ /]?){1,15}) *", clean_name)
title["en"] = res.group(1).strip(" ")
except Exception as e:
logging.info(e)
logger.info(e)
for k, v in title.items():
if v is not None and "/" in v:
zh_list = v.split("/")

View File

@@ -1,13 +1,16 @@
import re
import logging
from env import EnvInfo
import qbittorrentapi
import json
import os
logger = logging.getLogger(__name__)
class SetRule:
def __init__(self):
with open(EnvInfo.info_path) as f:
with open(EnvInfo.info_path, encoding="utf-8") as f:
self.info = json.load(f)
self.bangumi_info = self.info["bangumi_info"]
self.rss_link = EnvInfo.rss_link
@@ -15,11 +18,12 @@ class SetRule:
self.user_name = EnvInfo.user_name
self.password = EnvInfo.password
self.download_path = EnvInfo.download_path
self.qb = qbittorrentapi.Client(host=self.host_ip, username=self.user_name, password=self.password)
self.qb = qbittorrentapi.Client(
host=self.host_ip, username=self.user_name, password=self.password)
try:
self.qb.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
logger.exception(e)
def set_rule(self, bangumi_name, group, season):
rule = {
@@ -36,7 +40,7 @@ class SetRule:
'addPaused': False,
'assignedCategory': 'Bangumi',
'savePath': str(os.path.join(EnvInfo.download_path, re.sub(EnvInfo.rule_name_re, " ", bangumi_name).strip(), season))
}
}
if EnvInfo.enable_group_tag:
rule_name = f"[{group}] {bangumi_name}"
else:
@@ -47,27 +51,27 @@ class SetRule:
try:
self.qb.rss_remove_item(item_path="Mikan_RSS")
except qbittorrentapi.exceptions.Conflict409Error:
print(f"[{EnvInfo.time_show_obj}] No feed exists, starting adding feed.")
logger.debug("No feed exists, starting adding feed.")
try:
self.qb.rss_add_feed(url=self.rss_link, item_path="Mikan_RSS")
print(f"[{EnvInfo.time_show_obj}] Successes adding RSS Feed.")
logger.debug("Successes adding RSS Feed.")
except ConnectionError:
print(f"[{EnvInfo.time_show_obj}] Error with adding RSS Feed.")
logger.debug("Error with adding RSS Feed.")
except qbittorrentapi.exceptions.Conflict409Error:
print(f"[{EnvInfo.time_show_obj}] RSS Already exists.")
logger.debug("RSS Already exists.")
def run(self):
print(f"[{EnvInfo.time_show_obj}] Start adding rules.")
logger.debug("Start adding rules.")
for info in self.bangumi_info:
if not info["added"]:
self.set_rule(info["title"], info["group"], info["season"])
info["added"] = True
with open(EnvInfo.info_path, 'w', encoding='utf8') as f:
json.dump(self.info, f, indent=4, separators=(',', ': '), ensure_ascii=False)
print(f"[{EnvInfo.time_show_obj}] Finished.")
json.dump(self.info, f, indent=4, separators=(
',', ': '), ensure_ascii=False)
logger.debug("Finished.")
if __name__ == "__main__":
put = SetRule()
put.run()

View File

@@ -1,6 +1,7 @@
# -*- coding: UTF-8 -*-
import os
import sys
import logging
import requests
from bs4 import BeautifulSoup
import json
@@ -8,34 +9,40 @@ import re
from env import EnvInfo, BColors
from RSSFilter import RSSInfoCleaner as Filter
logger = logging.getLogger(__name__)
class MatchRule:
split_rule = r"\[|\]|\【|\】|\★|\|\|\(|\)"
last_rule = r"(.*)( \-)"
sub_title = r"[^\x00-\xff]{1,}| \d{1,2}^.*|\·"
match_rule = r"(S\d{1,2}(.*))"
season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季|第.*期)"
season_number_match = r"(\d+)"
class CollectRSS:
def __init__(self):
self.bangumi_list = []
with open(EnvInfo.rule_path) as r:
with open(EnvInfo.rule_path, encoding="utf-8") as r:
self.rules = json.load(r)
try:
self.rules = requests.get(EnvInfo.rule_url).json()
with open(EnvInfo.rule_path, 'w') as f:
json.dump(self.rules, f, indent=4, separators=(',', ': '), ensure_ascii=False)
with open(EnvInfo.rule_path, "w", encoding="utf-8") as f:
json.dump(
self.rules, f, indent=4, separators=(",", ": "), ensure_ascii=False
)
except:
with open(EnvInfo.rule_path) as r:
with open(EnvInfo.rule_path, encoding="utf-8") as r:
self.rules = json.load(r)
try:
rss = requests.get(EnvInfo.rss_link, 'utf-8')
rss = requests.get(EnvInfo.rss_link, "utf-8")
except:
print(f"[{EnvInfo.time_show_obj}] ERROR with DNS/Connection.")
logger.debug("ERROR with DNS/Connection.")
quit()
soup = BeautifulSoup(rss.text, 'xml')
self.items = soup.find_all('item')
with open(EnvInfo.info_path) as i:
soup = BeautifulSoup(rss.text, "xml")
self.items = soup.find_all("item")
with open(EnvInfo.info_path, encoding="utf-8") as i:
self.info = json.load(i)
def get_info_list(self):
@@ -43,40 +50,43 @@ class CollectRSS:
name = item.title.string
# debug 用
if EnvInfo.get_rule_debug:
sys.stdout.write(f"[{EnvInfo.time_show_obj}] Raw {name}")
logger.debug(f"Raw {name}")
exit_flag = False
for rule in self.rules:
for group in rule["group_name"]:
if re.search(group, name):
exit_flag = True
n = re.split(MatchRule.split_rule, name)
while '' in n:
n.remove('')
while ' ' in n:
n.remove(' ')
while "" in n:
n.remove("")
while " " in n:
n.remove(" ")
try:
bangumi_title = n[rule['name_position']].strip()
bangumi_title = n[rule["name_position"]].strip()
except IndexError:
continue
sub_title = re.sub(MatchRule.sub_title, "", bangumi_title)
sub_title = re.sub(
MatchRule.sub_title, "", bangumi_title)
b = re.split(r"\/|\_", sub_title)
while '' in b:
b.remove('')
pre_name = max(b, key=len, default='').strip()
while "" in b:
b.remove("")
pre_name = max(b, key=len, default="").strip()
if len(pre_name.encode()) > 3:
bangumi_title = pre_name
for i in range(2):
match_obj = re.match(MatchRule.last_rule, bangumi_title, re.I)
match_obj = re.match(
MatchRule.last_rule, bangumi_title, re.I
)
if match_obj is not None:
bangumi_title = match_obj.group(1).strip()
match_obj = re.match(MatchRule.match_rule, bangumi_title, re.I)
match_obj = re.match(
MatchRule.match_rule, bangumi_title, re.I)
if match_obj is not None:
bangumi_title = match_obj.group(2).strip()
if bangumi_title not in self.bangumi_list:
self.bangumi_list.append({
"title": bangumi_title,
"group": group
})
self.bangumi_list.append(
{"title": bangumi_title, "group": group}
)
# debug
# print(bangumi_title)
# print(group)
@@ -84,7 +94,7 @@ class CollectRSS:
if exit_flag:
break
if not exit_flag:
print(f"[{EnvInfo.time_show_obj}] ERROR Not match with {name}")
logger.debug("ERROR Not match with {name}")
def put_info_json(self):
had_data = []
@@ -92,29 +102,43 @@ class CollectRSS:
for data in self.info["bangumi_info"]:
had_data.append(data["title"])
else:
self.info = {
"rss_link": EnvInfo.rss_link,
"bangumi_info": []
}
self.info = {"rss_link": EnvInfo.rss_link, "bangumi_info": []}
for item in self.bangumi_list:
match_title_season = re.match(MatchRule.season_match, item["title"], re.I)
title = item["title"]
match_title_season = re.match(MatchRule.season_match, title, re.I)
if match_title_season is not None:
json_title = match_title_season.group(1).strip()
json_season = match_title_season.group(2)
match_season_number = re.findall(
MatchRule.season_number_match, json_season
)
if len(match_season_number) != 0:
json_season_number = int(match_season_number[0])
else:
logger.warning(
f"title:{title} season:{json_season} can't match season in number"
)
json_season_number = 1
else:
json_season = 'S01'
json_title = item["title"]
json_season = "S01"
json_season_number = 1
json_title = title
if json_title not in had_data:
self.info["bangumi_info"].append({
"title": json_title,
"season": json_season,
"group": item["group"],
"added": False
})
self.info["bangumi_info"].append(
{
"title": json_title,
"season": json_season,
"season_number": json_season_number,
"group": item["group"],
"added": False,
}
)
had_data.append(json_title)
print(f"[{EnvInfo.time_show_obj}] add {json_title} {json_season}")
with open(EnvInfo.info_path, 'w', encoding='utf8') as f:
json.dump(self.info, f, indent=4, separators=(',', ': '), ensure_ascii=False)
logger.debug("add {json_title} {json_season}")
with open(EnvInfo.info_path, "w", encoding="utf8") as f:
json.dump(
self.info, f, indent=4, separators=(",", ": "), ensure_ascii=False
)
def run(self):
self.get_info_list()

View File

@@ -1,22 +1,36 @@
import os
import time
import json
import logging
from collect_bangumi_info import CollectRSS
from auto_set_rule import SetRule
from rename_qb import qBittorrentRename
import json
from env import EnvInfo
def setup_logger():
DATE_FORMAT = "%Y-%m-%d %X"
LOGGING_FORMAT = "%(asctime)s %(levelname)s: %(message)s"
logging.basicConfig(
level=logging.DEBUG,
datefmt=DATE_FORMAT,
format=LOGGING_FORMAT,
encoding="utf-8",
)
def create_data_file():
if not os.path.exists(EnvInfo.info_path):
bangumi_info = {"rss_link": "",
"bangumi_info": []
}
bangumi_info = {"rss_link": "", "bangumi_info": []}
with open(EnvInfo.info_path, "w") as i:
json.dump(bangumi_info, i, indent=4, separators=(',', ': '), ensure_ascii=False)
json.dump(
bangumi_info, i, indent=4, separators=(",", ": "), ensure_ascii=False
)
if __name__ == "__main__":
setup_logger()
create_data_file()
SetRule().rss_feed()
while True:

View File

@@ -36,9 +36,6 @@ class EnvInfo:
# Static ENV
rule_url = "https://raw.githubusercontent.com/EstrellaXD/Bangumi_Auto_Collector/main/AutoBangumi/config/rule.json"
def time_show_obj(self):
return time.strftime('%Y-%m-%d %X')
rule_name_re = r"\:|\/|\."

View File

@@ -3,10 +3,13 @@ import requests
from qbittorrentapi import Client
from env import EnvInfo, Other
from bs4 import BeautifulSoup
import logging
logger = logging.getLogger(__name__)
class FullSeasonGet:
def __init__(self,group, bangumi_name, season):
def __init__(self, group, bangumi_name, season):
self.torrents = None
self.bangumi_name = bangumi_name
self.group = group
@@ -14,23 +17,29 @@ class FullSeasonGet:
def get_season_rss(self):
if self.season == "S01":
season = ''
season = ""
else:
season = self.season
season = requests.get(f"https://mikanani.me/RSS/Search?searchstr={self.group}+{self.bangumi_name}+{season}")
soup = BeautifulSoup(season.content, 'xml')
self.torrents = soup.find_all('enclosure')
season = requests.get(
f"https://mikanani.me/RSS/Search?searchstr={self.group}+{self.bangumi_name}+{season}")
soup = BeautifulSoup(season.content, "xml")
self.torrents = soup.find_all("enclosure")
def add_torrents(self):
qb = Client(host=EnvInfo.host_ip, username=EnvInfo.user_name, password=EnvInfo.password)
qb = Client(
host=EnvInfo.host_ip, username=EnvInfo.user_name, password=EnvInfo.password
)
try:
qb.auth_log_in()
except:
print('Error')
logger.error("Error")
for torrent in self.torrents:
qb.torrents_add(
urls=torrent["url"],
save_path=str(os.path.join(EnvInfo.download_path, self.bangumi_name, self.season)),
save_path=str(
os.path.join(EnvInfo.download_path,
self.bangumi_name, self.season)
),
category="Bangumi",
)
@@ -41,7 +50,7 @@ class FullSeasonGet:
if __name__ == "__main__":
a = FullSeasonGet('Lilith-Raws', 'Shijou Saikyou no Daimaou', 'S01')
a = FullSeasonGet("Lilith-Raws", "Shijou Saikyou no Daimaou", "S01")
a.run()
for torrent in a.torrents:
print(torrent['url'])
logger.debug(torrent["url"])

View File

@@ -1,95 +1,84 @@
import re
import sys
import qbittorrentapi
import time
import logging
from env import EnvInfo
logger = logging.getLogger(__name__)
class qBittorrentRename:
def __init__(self):
self.qbt_client = qbittorrentapi.Client(host=EnvInfo.host_ip,
username=EnvInfo.user_name,
password=EnvInfo.password)
self.qbt_client = qbittorrentapi.Client(
host=EnvInfo.host_ip, username=EnvInfo.user_name, password=EnvInfo.password
)
try:
self.qbt_client.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
self.recent_info = self.qbt_client.torrents_info(status_filter='completed',category="Bangumi")
self.hash = None
self.name = None
self.new_name = None
self.path_name = None
logger.exception(e)
self.recent_info = self.qbt_client.torrents_info(
status_filter="completed", category="Bangumi"
)
self.count = 0
self.rename_count = 0
self.torrent_count = len(self.recent_info)
self.rules = [r'(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)',
r'(.*)\[E(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)',
r'(.*)\[第(\d*\.*\d*)话(?:END)?\](.*)',
r'(.*)\[第(\d*\.*\d*)(?:END)?\](.*)',
r'(.*)第(\d*\.*\d*)(?:END)?(.*)',
r'(.*)第(\d*\.*\d*)(?:END)?(.*)',
r'(.*)- (\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)? (.*)']
rules = [
r"(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
r"(.*)\[E(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)",
r"(.*)\[第(\d*\.*\d*)(?:END)?\](.*)",
r"(.*)\[第(\d*\.*\d*)(?:END)?\](.*)",
r"(.*)第(\d*\.*\d*)(?:END)?(.*)",
r"(.*)(\d*\.*\d*)話(?:END)?(.*)",
r"(.*)- (\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)? (.*)",
]
self.rules = [re.compile(rule) for rule in rules]
def rename_normal(self, idx):
self.name = self.recent_info[idx].name
self.hash = self.recent_info[idx].hash
self.path_name = self.recent_info[idx].content_path.split("/")[-1]
file_name = self.name
def rename_normal(self, name):
for rule in self.rules:
matchObj = re.match(rule, file_name, re.I)
matchObj = rule.match(name, re.I)
if matchObj is not None:
self.new_name = f'{matchObj.group(1).strip()} E{matchObj.group(2)}{matchObj.group(3)}'
new_name = f"{matchObj.group(1).strip()} E{matchObj.group(2)}{matchObj.group(3)}"
return new_name
def rename_pn(self, idx):
self.name = self.recent_info[idx].name
self.hash = self.recent_info[idx].hash
self.path_name = self.recent_info[idx].content_path.split("/")[-1]
n = re.split(r'\[|\]', self.name)
file_name = self.name.replace(f'[{n[1]}]', '')
def rename_pn(self, name):
n = re.split(r"\[|\]", name)
file_name = name.replace(f"[{n[1]}]", "")
for rule in self.rules:
matchObj = re.match(rule, file_name, re.I)
matchObj = rule.match(file_name, re.I)
if matchObj is not None:
self.new_name = re.sub(r'\[|\]', '', f'{matchObj.group(1).strip()} E{matchObj.group(2)}{n[-1]}')
new_name = re.sub(
r"\[|\]",
"",
f"{matchObj.group(1).strip()} E{matchObj.group(2)}{n[-1]}",
)
return new_name
def rename(self):
if self.path_name != self.new_name:
self.qbt_client.torrents_rename_file(torrent_hash=self.hash, old_path=self.path_name, new_path=self.new_name)
print(f"[{time.strftime('%Y-%m-%d %X')}] {self.path_name} >> {self.new_name}")
def rename_torrent_file(self, hash, path_name, new_name):
if path_name != new_name:
self.qbt_client.torrents_rename_file(
torrent_hash=hash, old_path=path_name, new_path=new_name
)
logger.debug(f"{path_name} >> {new_name}")
self.count += 1
else:
return
def clear_info(self):
self.name = None
self.hash = None
self.new_name = None
self.path_name = None
def print_result(self):
sys.stdout.write(f"[{EnvInfo.time_show_obj}] 已完成对{self.torrent_count}个文件的检查" + '\n')
sys.stdout.write(f"[{EnvInfo.time_show_obj}] 已对其中{self.count}个文件进行重命名" + '\n')
sys.stdout.write(f"[{EnvInfo.time_show_obj}] 完成" + '\n')
sys.stdout.flush()
logger.debug(f"已完成对{self.torrent_count}个文件的检查")
logger.debug(f"已对其中{self.count}个文件进行重命名")
logger.debug(f"完成")
def run(self):
if EnvInfo.method not in ['pn', 'normal']:
print('error method')
elif EnvInfo.method == 'normal':
for i in range(0, self.torrent_count + 1):
method_dict = {"pn": self.rename_pn, "normal": self.rename_normal}
if EnvInfo.method not in method_dict:
logger.error(f"error method")
else:
for i in range(0, self.torrent_count):
try:
self.rename_normal(i)
self.rename()
self.clear_info()
info = self.recent_info[i]
name = info.name
hash = info.hash
path_name = info.content_path.split("/")[-1]
new_name = method_dict[EnvInfo.method](name)
self.rename_torrent_file(hash, path_name, new_name)
except:
self.print_result()
elif EnvInfo.method == 'pn':
for i in range(0, self.torrent_count + 1):
try:
self.rename_pn(i)
self.rename()
self.clear_info()
except:
self.print_result()
logger.warning(f"{name} rename fail")
self.print_result()

View File

@@ -3,6 +3,7 @@ import re
import sys
import time
import json
import logging
import qbittorrentapi
import requests
@@ -12,6 +13,7 @@ import requests.packages.urllib3.util.ssl_
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL'
logger = logging.getLogger(__name__)
class EnvInfo:
if getattr(sys, 'frozen', False):
@@ -31,7 +33,6 @@ class EnvInfo:
method = info["method"]
# rss_link = "https://mikanani.me/RSS/MyBangumi?token=Td8ceWZZv3s2OZm5ji9RoMer8vk5VS3xzC1Hmg8A26E%3d"
rule_url = "https://raw.githubusercontent.com/EstrellaXD/Bangumi_Auto_Collector/main/AutoBangumi/config/rule.json"
time_show_obj = time.strftime('%Y-%m-%d %X')
bangumi_info = info["bangumi_info"]
@@ -47,7 +48,7 @@ class SetRule:
try:
self.qb.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
logger.exception(e)
def set_rule(self, bangumi_name, season):
rule = {
@@ -71,19 +72,17 @@ class SetRule:
try:
self.qb.rss_remove_item(item_path="Mikan_RSS")
self.qb.rss_add_feed(url=self.rss_link, item_path="Mikan_RSS")
sys.stdout.write(f"[{EnvInfo.time_show_obj}] Successes adding RSS Feed." + "\n")
logger.debug("Successes adding RSS Feed." + "\n")
except ConnectionError:
sys.stdout.write(f"[{EnvInfo.time_show_obj}] Error with adding RSS Feed." + "\n")
logger.debug("Error with adding RSS Feed." + "\n")
except qbittorrentapi.exceptions.Conflict409Error:
sys.stdout.write(f"[{EnvInfo.time_show_obj}] RSS Already exists." + "\n")
logger.debug("RSS Already exists." + "\n")
def run(self):
sys.stdout.write(f"[{EnvInfo.time_show_obj}] Start adding rules." + "\n")
sys.stdout.flush()
logger.debug("Start adding rules." + "\n")
for info in self.bangumi_info:
self.set_rule(info["title"], info["season"])
sys.stdout.write(f"[{EnvInfo.time_show_obj}] Finished." + "\n")
sys.stdout.flush()
logger.debug("Finished." + "\n")
class MatchRule:
@@ -100,7 +99,7 @@ class CollectRSS:
try:
self.rules = requests.get(EnvInfo.rule_url).json()
except ConnectionError:
sys.stdout.write(f"[{EnvInfo.time_show_obj}] Get rules Erroe=r")
logger.debug(" Get rules Erroe=r")
rss = requests.get(EnvInfo.rss_link, 'utf-8')
soup = BeautifulSoup(rss.text, 'xml')
self.items = soup.find_all('item')
@@ -147,7 +146,7 @@ class CollectRSS:
if exit_flag:
break
if not exit_flag:
print(f"[{EnvInfo.time_show_obj}] ERROR Not match with {name}")
logger.debug("ERROR Not match with {name}")
def put_info_json(self):
had_data = []
@@ -166,8 +165,7 @@ class CollectRSS:
"title": json_title,
"season": json_season
})
sys.stdout.write(f"[{EnvInfo.time_show_obj}] add {json_title} {json_season}" + "\n")
sys.stdout.flush()
logger.debug("add {json_title} {json_season}" + "\n")
EnvInfo.info["bangumi_info"] = self.info
with open(EnvInfo.path, 'w', encoding='utf8') as f:
data = json.dumps(EnvInfo.info, indent=4, separators=(',', ': '), ensure_ascii=False)
@@ -187,7 +185,7 @@ class qBittorrentRename:
try:
self.qbt_client.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
logger.debug(e)
self.recent_info = self.qbt_client.torrents_info(status_filter='completed', category="Bangumi")
self.hash = None
self.name = None
@@ -229,7 +227,7 @@ class qBittorrentRename:
if self.path_name != self.new_name:
self.qbt_client.torrents_rename_file(torrent_hash=self.hash, old_path=self.path_name,
new_path=self.new_name)
sys.stdout.write(f"[{time.strftime('%Y-%m-%d %X')}] {self.path_name} >> {self.new_name}")
logger.debug(f"{self.path_name} >> {self.new_name}")
self.count += 1
else:
return
@@ -241,14 +239,13 @@ class qBittorrentRename:
self.path_name = None
def print_result(self):
sys.stdout.write(f"[{EnvInfo.time_show_obj}] 已完成对{self.torrent_count}个文件的检查" + '\n')
sys.stdout.write(f"[{EnvInfo.time_show_obj}] 已对其中{self.count}个文件进行重命名" + '\n')
sys.stdout.write(f"[{EnvInfo.time_show_obj}] 完成" + '\n')
sys.stdout.flush()
logger.debug("已完成对{self.torrent_count}个文件的检查")
logger.debug("已对其中{self.count}个文件进行重命名")
logger.debug("完成")
def run(self):
if EnvInfo.method not in ['pn', 'normal']:
print('error method')
logger.error('error method')
elif EnvInfo.method == 'normal':
for i in range(0, self.torrent_count + 1):
try:

View File

@@ -4,6 +4,9 @@ import sys
import qbittorrentapi
from os import environ
import time
import logging
logger = logging.getLogger(__name__)
host_ip = environ['HOST']
user_name = environ['USER']
@@ -22,8 +25,6 @@ episode_rules = [r'(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)',
r'(.*)- (\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)? (.*)']
# Suffixs of files we are going to rename
suffixs = ['mp4', 'mkv', 'avi', 'mov', 'flv', 'rmvb', 'ass', 'idx']
sys.stdout = io.TextIOWrapper(buffer=sys.stdout.buffer, encoding='utf8')
class QbittorrentRename:
def __init__(self, rename_method):
@@ -31,7 +32,7 @@ class QbittorrentRename:
try:
self.qbt_client.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
logger.exception(e)
self.recent_info = self.qbt_client.torrents_info(status_filter='completed')
self.hash = None
self.name = None
@@ -66,8 +67,7 @@ class QbittorrentRename:
def rename(self):
if self.path_name != self.new_name:
self.qbt_client.torrents_rename_file(torrent_hash=self.hash, old_path=self.path_name, new_path=self.new_name)
sys.stdout.write(f"[{time.strftime('%X')}] {self.path_name} >> {self.new_name}" + '\n')
sys.stdout.flush()
logger.debug("{self.path_name} >> {self.new_name}")
self.count += 1
else:
return
@@ -79,14 +79,13 @@ class QbittorrentRename:
self.path_name = None
def print_result(self):
sys.stdout.write(f"[{time.strftime('%X')}] 已完成对{self.torrent_count}个文件的检查" + '\n')
sys.stdout.write(f"[{time.strftime('%X')}] 已对其中{self.count}个文件进行重命名" + '\n')
sys.stdout.write(f"[{time.strftime('%X')}] 完成" + '\n')
sys.stdout.flush()
logger.debug("已完成对{self.torrent_count}个文件的检查")
logger.debug("已对其中{self.count}个文件进行重命名")
logger.debug("完成")
def rename_app(self):
if self.method not in ['pn', 'normal']:
print('error method')
logger.error('error method')
elif self.method == 'normal':
for i in range(0, self.torrent_count + 1):
try:
@@ -106,8 +105,7 @@ class QbittorrentRename:
def rename_main():
sys.stdout.write(f"[{time.strftime('%X')}] Program start." + '\n')
sys.stdout.flush()
logger.debug("Program start.")
while True:
rename = QbittorrentRename(method)
rename.rename_app()

View File

@@ -3,10 +3,13 @@ import io
import sys
import os.path as op
import time
import logging
import qbittorrentapi
import json
logger = logging.getLogger(__name__)
with open("config.json") as f:
server_info = json.load(f)
host_ip = "http://"+server_info['host_ip']
@@ -25,7 +28,6 @@ episode_rules = [r'(.*)\[(\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)?\](.*)',
r'(.*)- (\d{1,3}|\d{1,3}\.\d{1,2})(?:v\d{1,2})?(?:END)? (.*)']
# Suffixs of files we are going to rename
suffixs = ['mp4', 'mkv', 'avi', 'mov', 'flv', 'rmvb', 'ass', 'idx']
sys.stdout = io.TextIOWrapper(buffer=sys.stdout.buffer, encoding='utf8')
class QbittorrentRename:
@@ -34,7 +36,7 @@ class QbittorrentRename:
try:
self.qbt_client.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
logger.exception(e)
self.recent_info = self.qbt_client.torrents_info(status_filter='completed')
self.hash = None
self.name = None
@@ -68,7 +70,7 @@ class QbittorrentRename:
def rename(self):
if self.path_name != self.new_name:
self.qbt_client.torrents_rename_file(torrent_hash=self.hash, old_path=self.path_name, new_path=self.new_name)
print(f"[{time.strftime('%X')}] {self.path_name} >> {self.new_name}")
logger.debug("{self.path_name} >> {self.new_name}")
self.count += 1
else:
return
@@ -79,13 +81,13 @@ class QbittorrentRename:
self.new_name = None
def print_result(self):
print(f"[{time.strftime('%X')}] 已完成对{self.torrent_count}个文件的检查")
print(f"[{time.strftime('%X')}] 已对其中{self.count}个文件进行重命名")
print(f"[{time.strftime('%X')}] 完成")
logger.debug(f"已完成对{self.torrent_count}个文件的检查")
logger.debug(f"已对其中{self.count}个文件进行重命名")
logger.debug(f"完成")
def rename_app(self):
if self.method not in ['pn', 'normal']:
print('error method')
logger.error('error method')
elif self.method == 'normal':
for i in range(0, self.torrent_count + 1):
try:

View File

@@ -3,6 +3,9 @@ import qbittorrentapi
import json
import argparse
import os
import logging
logger = logging.getLogger(__name__)
f = open("config.json")
server_info = json.load(f)
@@ -22,7 +25,7 @@ def rule_set():
try:
qbt_client.auth_log_in()
except qbittorrentapi.LoginFailed as e:
print(e)
logger.exception(e)
args = parser.parse_args()
bangumi_name = args.name
rule = {'enable': True,