mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-02-03 02:04:06 +08:00
修正目录
This commit is contained in:
@@ -20,4 +20,4 @@ coverage.xml
|
||||
.hypothesis
|
||||
|
||||
auto_bangumi/const_dev.py
|
||||
config/bangumi.json
|
||||
config/bangumi.json/config/bangumi.json
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -163,3 +163,4 @@ cython_debug/
|
||||
/auto_bangumi/const_dev.py
|
||||
/config/bangumi.json
|
||||
/auto_bangumi/tester.py
|
||||
!/source/names.txt
|
||||
|
||||
4
.idea/Bangumi_Auto_Rename.iml
generated
4
.idea/Bangumi_Auto_Rename.iml
generated
@@ -1,7 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/auto_bangumi" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
|
||||
@@ -1,889 +0,0 @@
|
||||
import re
|
||||
import json
|
||||
import zhconv
|
||||
import logging
|
||||
from fliter_base import *
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
handler = logging.FileHandler(
|
||||
filename="RssFilter/rename_log.txt", mode="w", encoding="utf-8"
|
||||
)
|
||||
handler.setFormatter(
|
||||
logging.Formatter(
|
||||
"%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
|
||||
)
|
||||
)
|
||||
logger.level = logging.WARNING
|
||||
logger.addHandler(handler)
|
||||
|
||||
const = {
|
||||
"group_character":[
|
||||
"字幕社",
|
||||
"字幕组",
|
||||
"字幕屋",
|
||||
"发布组",
|
||||
"连载组",
|
||||
"动漫",
|
||||
"国漫",
|
||||
"汉化",
|
||||
"raw",
|
||||
"works",
|
||||
"工作室",
|
||||
"压制",
|
||||
"合成",
|
||||
"制作",
|
||||
"搬运",
|
||||
"委员会",
|
||||
"家族",
|
||||
"译制",
|
||||
"动画",
|
||||
"研究所",
|
||||
"sub",
|
||||
"翻译",
|
||||
"联盟",
|
||||
"dream",
|
||||
"-rip",
|
||||
"neo",
|
||||
"team",
|
||||
"百合组",
|
||||
"慕留人",
|
||||
"行动组",
|
||||
],
|
||||
"group_char" : [
|
||||
"dmhy",
|
||||
"澄空学园",
|
||||
"c.c动漫",
|
||||
"vcb",
|
||||
"amor",
|
||||
"moozzi2",
|
||||
"skytree",
|
||||
"sweetsub",
|
||||
"pcsub",
|
||||
"ahu-sub",
|
||||
"f宅",
|
||||
"captions",
|
||||
"dragsterps",
|
||||
"onestar",
|
||||
"lolihouse",
|
||||
"天空树",
|
||||
"妇联奶子",
|
||||
"不够热",
|
||||
"烤肉同好",
|
||||
"卡通",
|
||||
"时雨初空",
|
||||
"nyaa",
|
||||
"ddd",
|
||||
"koten",
|
||||
"reinforce",
|
||||
"届恋对邦小队",
|
||||
"cxraw",
|
||||
"witex.io",
|
||||
]
|
||||
}
|
||||
const["all_charactor"] = const["group_character"] + const["group_char"]
|
||||
|
||||
class RSSInfoCleaner:
|
||||
class Name:
|
||||
def __init__(self) -> None:
|
||||
self.raw = None
|
||||
self.conv = None
|
||||
self.zh = None
|
||||
self.en = None
|
||||
self.jp = None
|
||||
self.clean = None
|
||||
|
||||
class Info:
|
||||
def __init__(self) -> None:
|
||||
self.group = None
|
||||
self.season = None
|
||||
self.episode = None
|
||||
self.vision = None
|
||||
|
||||
class Tag:
|
||||
def __init__(self) -> None:
|
||||
self.dpi = None
|
||||
self.ass = None
|
||||
self.lang = None
|
||||
self.type = None
|
||||
self.code = None
|
||||
self.source = None
|
||||
|
||||
def __init__(self, file_name):
|
||||
self.name = RSSInfoCleaner.Name()
|
||||
self.info = RSSInfoCleaner.Info()
|
||||
self.tag = RSSInfoCleaner.Tag()
|
||||
self.file_name = file_name
|
||||
self.name.raw = file_name # 接收文件名参数
|
||||
self.clean() # 清理广告等杂质
|
||||
# 匹配特征等
|
||||
with open("RssFilter/clean_rule.json", encoding="utf-8") as file_obj:
|
||||
rule_json = json.load(file_obj)[0]["group_name"]
|
||||
const["group_rule"] = [zhconv.convert(x, "zh-cn") for x in rule_json]
|
||||
self.file_info = {}
|
||||
|
||||
self.pre_analyse = None
|
||||
# 匹配字幕组特征
|
||||
self.recognize_group()
|
||||
self.info.group = self.get_group()
|
||||
self.tag.dpi = self.get_dpi()
|
||||
self.info.season = self.get_season()
|
||||
self.info.episode = self.get_episode()
|
||||
self.info.vision = self.get_vision()
|
||||
self.tag.lang = self.get_language()
|
||||
self.tag.ass = self.get_ass()
|
||||
self.tag.type = self.get_type()
|
||||
self.tag.code = self.get_code()
|
||||
self.tag.source = self.get_source()
|
||||
self.name.clean = self.get_clean_name()
|
||||
self.zh_list = []
|
||||
self.jp_list = []
|
||||
self.en_list = []
|
||||
self.get_title()
|
||||
|
||||
# 清理原链接(中文字符替换为英文)
|
||||
|
||||
def clean(self):
|
||||
file_name = zhconv.convert(self.name.raw, "zh-cn")
|
||||
# 去广告
|
||||
file_name = re.sub(
|
||||
"[((\[【]?(字幕)?[\u4e00-\u9fa5、]{0,3}(新人|招募?新?)[\u4e00-\u9fa5、]{0,8}[))\]】]?",
|
||||
"",
|
||||
file_name,
|
||||
)
|
||||
# 除杂
|
||||
file_name = re.sub(
|
||||
"[((\[【]?★?((网飞)?\d{4}年[春夏秋冬]?)?[\d一二三四五六七八九十]{1,2}月新?番?★?[))\]】]?",
|
||||
"",
|
||||
file_name,
|
||||
)
|
||||
# 除杂x2
|
||||
file_name = re.sub("[((\[【 ](2\d{3})[))\]】 ]", " ", file_name)
|
||||
# 除杂x3
|
||||
file_name = re.sub(
|
||||
"[((\[【]?((网飞)?2(\d{3}[年.][春夏秋冬]?)\d{1,2}\.?\d{1,2})[))\]】]?", "", file_name
|
||||
)
|
||||
# 除杂x4
|
||||
file_name = re.sub("[((\[【]检索.*[))\]】]?", "", file_name)
|
||||
strip = [
|
||||
"特效歌词",
|
||||
"复制磁连",
|
||||
"兼容",
|
||||
"配音",
|
||||
"网盘",
|
||||
"\u200b",
|
||||
"[PSV&PC]",
|
||||
"Rv40",
|
||||
"R10",
|
||||
"Fin]",
|
||||
"Fin ",
|
||||
"[mkv]",
|
||||
"[]",
|
||||
"★",
|
||||
"☆",
|
||||
]
|
||||
file_name = del_rules(file_name, strip)
|
||||
# xx_xx_xx
|
||||
f_res = re.search("]?(([a-zA-Z:.。,,!!]{1,10})[_\[ ]){2,}", file_name)
|
||||
if f_res is not None:
|
||||
file_name = file_name.replace(
|
||||
f_res.group(), "%s/" % f_res.group().replace("_", " ")
|
||||
)
|
||||
# 中文_英文名_
|
||||
f_res = re.search("_[a-zA-Z_ \-·、.。,!!]*[_))\]】]", file_name)
|
||||
# !!!重要
|
||||
if f_res is not None:
|
||||
file_name = file_name.replace(
|
||||
f_res.group(), "/%s/" % f_res.group().strip("_")
|
||||
)
|
||||
# 日文.英文名
|
||||
f_res = re.search(
|
||||
"([\u4e00-\u9fa5\u3040-\u31ff\d:\-·、.。,!!]{1,20}\.)([a-zA-Z\d:\-.。,,!!]{1,20} ?){2,}",
|
||||
file_name,
|
||||
)
|
||||
if f_res is not None:
|
||||
file_name = file_name.replace(
|
||||
f_res.group(1), "%s/" % f_res.group(1).strip(".")
|
||||
)
|
||||
|
||||
self.name.raw = (
|
||||
str(file_name)
|
||||
.replace(":", ":")
|
||||
.replace("【", "[")
|
||||
.replace("】", "]")
|
||||
.replace("-", "-")
|
||||
.replace("(", "(")
|
||||
.replace(")", ")")
|
||||
.replace("&", "&")
|
||||
.replace("X", "x")
|
||||
.replace("×", "x")
|
||||
.replace("Ⅹ", "x")
|
||||
.replace("__", "/")
|
||||
)
|
||||
|
||||
# 检索字幕组特征
|
||||
def recognize_group(self):
|
||||
rule = const["group_rule"]
|
||||
# 字幕组(特例)特征优先级大于通用特征
|
||||
character = const["all_charactor"]
|
||||
# !强规则,人工录入标准名,区分大小写,优先匹配
|
||||
for char in rule:
|
||||
if ("&" + char) in self.file_name or (char + "&") in self.file_name:
|
||||
self.pre_analyse = (
|
||||
re.search(
|
||||
"[((\[【]?(.*?(&%s|%s&).*?)[))\]】]?" % (char, char),
|
||||
self.file_name,
|
||||
)
|
||||
.group(1)
|
||||
.lower()
|
||||
)
|
||||
return "enforce"
|
||||
else:
|
||||
if char in self.file_name:
|
||||
self.pre_analyse = char.lower()
|
||||
return "enforce"
|
||||
# 如果文件名以 [字幕组名] 开头
|
||||
if self.name.raw[0] == "[":
|
||||
str_split = self.name.raw.lower().split("]")
|
||||
# 检索特征值是否位于文件名第1、2、最后一段
|
||||
for char in character:
|
||||
if (
|
||||
char in str_split[0]
|
||||
or char in str_split[1]
|
||||
or char in str_split[-1]
|
||||
):
|
||||
self.pre_analyse = char
|
||||
return "success"
|
||||
# 文件名是否为 [字幕组名&字幕组名&字幕组名] ,求求了,一集的工作量真的需要三个组一起做吗
|
||||
if "&" in str_split[0]:
|
||||
# 限制匹配长度,防止出bug
|
||||
self.pre_analyse = (
|
||||
str_split[0][1:] if len(str_split[0][1:]) < 15 else None
|
||||
)
|
||||
return "special"
|
||||
# 再匹配不上我就麻了
|
||||
self.pre_analyse = None
|
||||
return False
|
||||
# 文件名以 -字幕组名 结尾
|
||||
elif "-" in self.name.raw:
|
||||
for char in character:
|
||||
if char in self.name.raw.lower().split("-")[-1]:
|
||||
self.pre_analyse = self.name.raw.lower().split("-")[-1]
|
||||
return "reserve"
|
||||
self.pre_analyse = None
|
||||
return False
|
||||
# 文件名以空格分隔 字幕组名为第一段
|
||||
else:
|
||||
first_str = self.name.raw.lower().split(" ")[0]
|
||||
for char in character:
|
||||
if char in first_str:
|
||||
self.pre_analyse = first_str
|
||||
return "blank"
|
||||
self.pre_analyse = None
|
||||
return False
|
||||
|
||||
# 获取字幕组名
|
||||
def get_group(self):
|
||||
# 是否匹配成功(哪种方式匹配成功)
|
||||
status = self.recognize_group()
|
||||
# 检索到的特征值
|
||||
res_char = self.pre_analyse
|
||||
# 分别对应 1、强制匹配 2、文件名为 [字幕组名&字幕组名&字幕组名]
|
||||
# 3、字幕组在结尾,这种情况已经识别出关键词 4、文件名以空格分隔 字幕组名为第一段
|
||||
if status in ["enforce", "special", "reserve", "blank"]:
|
||||
return res_char
|
||||
# 大部分情况
|
||||
elif status == "success":
|
||||
# 如果是 [字幕组名] ,这么标准的格式直接else送走吧,剩下的匹配一下
|
||||
if "[%s]" % res_char not in self.name.raw.lower():
|
||||
if self.name.raw[0] == "[":
|
||||
try:
|
||||
# 以特征值为中心,匹配最近的中括号,八成就这个了
|
||||
gp = get_gp(res_char, self.name.raw.lower())
|
||||
return gp
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"bug -- res_char:%s,%s,%s"
|
||||
% (res_char, self.name.raw.lower(), e)
|
||||
)
|
||||
else:
|
||||
return res_char
|
||||
# 再见
|
||||
return None
|
||||
|
||||
# 扒了6W数据,硬找的参数,没啥说的
|
||||
def get_dpi(self):
|
||||
file_name = self.name.raw
|
||||
dpi_list = [
|
||||
"4k",
|
||||
"2160p",
|
||||
"1440p",
|
||||
"1080p",
|
||||
"1036p",
|
||||
"816p",
|
||||
"810p",
|
||||
"720p",
|
||||
"576p",
|
||||
"544P",
|
||||
"540p",
|
||||
"480p",
|
||||
"1080i",
|
||||
"1080+",
|
||||
"360p",
|
||||
"3840x2160",
|
||||
"1920x1080",
|
||||
"1920x1036",
|
||||
"1920x804",
|
||||
"1920x800",
|
||||
"1536x864",
|
||||
"1452x1080",
|
||||
"1440x1080",
|
||||
"1280x720",
|
||||
"1272x720",
|
||||
"1255x940",
|
||||
"1024x768",
|
||||
"1024X576",
|
||||
"960x720",
|
||||
"948x720",
|
||||
"896x672",
|
||||
"872x480",
|
||||
"848X480",
|
||||
"832x624",
|
||||
"704x528",
|
||||
"640x480",
|
||||
"mp4_1080",
|
||||
"mp4_720",
|
||||
]
|
||||
for i in dpi_list:
|
||||
dpi = str(file_name).lower().find(i)
|
||||
if dpi > 0:
|
||||
return [str(i)]
|
||||
return None
|
||||
|
||||
# 获取语种
|
||||
def get_language(self):
|
||||
file_name = self.name.raw
|
||||
lang = []
|
||||
# 中文标示
|
||||
try:
|
||||
lang.append(
|
||||
re.search(
|
||||
"[((\[【 ]((tvb)?([粤简繁英俄][日中文体&/]?[_&]?){1,5})[))\]】]?",
|
||||
str(file_name),
|
||||
)
|
||||
.group(1)
|
||||
.strip(" ")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 中文标示
|
||||
try:
|
||||
lang.append(
|
||||
re.search("[((\[【]?[粤中简繁英俄日文体](双?(语|字幕))[))\]】]?", str(file_name))
|
||||
.group(1)
|
||||
.strip(" ")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 英文标示
|
||||
try:
|
||||
lang = lang + re.search(
|
||||
"[((\[【]?(((G?BIG5|CHT|CHS|GB|JPN?|CN)[/ _]?){1,3})[))\]】]?",
|
||||
str(file_name),
|
||||
).group(1).lower().strip(" ").split(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if lang:
|
||||
return lang
|
||||
else:
|
||||
return None
|
||||
|
||||
# 文件种类
|
||||
def get_type(self):
|
||||
file_name = self.name.raw
|
||||
type_list = []
|
||||
# 英文标示
|
||||
try:
|
||||
type_list.append(
|
||||
re.search(
|
||||
"[((\[【]?(((mp4|mkv|mp3)[ -]?){1,3})[))\]】]?",
|
||||
str(file_name).lower(),
|
||||
)
|
||||
.group(1)
|
||||
.strip(" ")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if type_list:
|
||||
return type_list
|
||||
else:
|
||||
return None
|
||||
|
||||
# 编码格式
|
||||
def get_code(self):
|
||||
file_name = self.name.raw
|
||||
code = []
|
||||
# 视频编码
|
||||
try:
|
||||
code = code + re.search(
|
||||
"[((\[【]?([ _-]?([xh]26[45]|hevc|avc)){1,5}[ ))\]】]?",
|
||||
str(file_name).lower(),
|
||||
).group(1).split(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 位深
|
||||
try:
|
||||
code = code + re.search(
|
||||
"[((\[【]?[ _-]?((10|8)[ -]?bit)[ ))\]】]?", str(file_name).lower()
|
||||
).group(1).split(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 音频编码
|
||||
try:
|
||||
code = code + re.search(
|
||||
"[((\[【]?(([ _-]?((flac(x\d)?|aac|mp3|opus)(x\d)?)){1,5})[ ))\]】]?",
|
||||
str(file_name).lower(),
|
||||
).group(3).split(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if code:
|
||||
return code
|
||||
else:
|
||||
return None
|
||||
|
||||
# 来源
|
||||
def get_source(self):
|
||||
file_name = str(self.name.raw).lower()
|
||||
type_list = []
|
||||
# 英文标示
|
||||
for _ in range(3):
|
||||
try:
|
||||
res = (
|
||||
re.search(
|
||||
"[((\[【]?((bd|dvd|hd|remux|(viu)?tvb?|ani-one|bilibili|网飞(动漫)|b-?global|baha|web[ /-]?(dl|rip))[ -]?(b[o0]x|iso|mut|rip)?)[))\]】]?",
|
||||
file_name,
|
||||
)
|
||||
.group(1)
|
||||
.lower()
|
||||
.strip(" ")
|
||||
)
|
||||
if res not in type_list:
|
||||
type_list.append(res)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
for res in type_list:
|
||||
file_name = file_name.replace(res, "")
|
||||
if type_list:
|
||||
return type_list
|
||||
else:
|
||||
return None
|
||||
|
||||
# 获取季度
|
||||
def get_season(self):
|
||||
file_name = self.name.raw.lower()
|
||||
season = []
|
||||
# 中文标示
|
||||
try:
|
||||
season.append(
|
||||
re.search(" ?(第?(\d{1,2}|[一二三])(部|季|季度|丁目))", str(file_name))
|
||||
.group(1)
|
||||
.strip(" ")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 英文标示
|
||||
try:
|
||||
season.append(
|
||||
re.search(
|
||||
"((final ?)?(season|[ \[]s) ?\d{1,2}|\d{1,2}-?choume)",
|
||||
str(file_name),
|
||||
)
|
||||
.group(1)
|
||||
.strip(" ")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if season:
|
||||
return season
|
||||
else:
|
||||
return None
|
||||
|
||||
# 获取集数
|
||||
def get_episode(self):
|
||||
file_name = self.name.raw.lower()
|
||||
episode = []
|
||||
# _集,国漫
|
||||
try:
|
||||
episode.append(
|
||||
re.search("(_((\d+集-)?\d+集)|[ (\[第]\d+-\d+ ?)", str(file_name)).group(1)
|
||||
)
|
||||
return episode
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# [10 11]集点名批评这种命名方法,几个国漫的组
|
||||
try:
|
||||
episode.append(
|
||||
re.search(
|
||||
"[\[( ](\d{1,3}[- &_]\d{1,3}) ?(fin| Fin|\(全集\))?[ )\]]",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
return episode
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 这里匹配ova 剧场版 不带集数的合集 之类的
|
||||
try:
|
||||
episode.append(
|
||||
re.search(
|
||||
"[\[ 第](_\d{1,3}集|ova|剧场版|全|OVA ?\d{0,2}|合|[一二三四五六七八九十])[集话章 \]\[]",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
return episode
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 标准单集 sp单集
|
||||
try:
|
||||
episode.append(
|
||||
re.search(
|
||||
"[\[ 第e]((sp|(数码)?重映)?(1?\d{1,3}(\.\d)?|1?\d{1,3}\(1?\d{1,3}\)))(v\d)?[集话章 \]\[]",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
return episode
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# xx-xx集
|
||||
try:
|
||||
episode.append(
|
||||
re.search(
|
||||
"[\[ 第(]((合集)?\\\)?(\d{1,3}[ &]\d{1,3})(话| |]|\(全集\)|全集|fin)",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
return episode
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
return None
|
||||
|
||||
# 获取版本
|
||||
def get_vision(self):
|
||||
file_name = self.name.raw.lower()
|
||||
vision = []
|
||||
# 中文
|
||||
try:
|
||||
vision.append(
|
||||
re.search(
|
||||
"[((\[【]?(([\u4e00-\u9fa5]{0,5}|v\d)((版本?|修[复正]|WEB限定)|片源?|内详|(特别篇))(话|版|合?集?))[))\]】]?",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 英文
|
||||
try:
|
||||
vision.append(
|
||||
re.search(
|
||||
"[((\[【 ]\d{1,2}((v\d)((版本?|修复?正?版)|片源?|内详)?)[))\]】]",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# [v2]
|
||||
try:
|
||||
vision.append(re.search("[((\[【 ](v\d)[))\]】]", str(file_name)).group(1))
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if vision:
|
||||
return vision
|
||||
else:
|
||||
return None
|
||||
|
||||
# 获取字幕类型
|
||||
def get_ass(self):
|
||||
file_name = self.name.raw.lower()
|
||||
ass = []
|
||||
# 中文标示
|
||||
try:
|
||||
ass.append(
|
||||
re.search(
|
||||
"[((\[【]?(附?([内外][挂嵌封][+&]?){1,2}(字幕|[简中日英]*音轨)?)[))\]】]?",
|
||||
str(file_name),
|
||||
).group(1)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 英文标示
|
||||
try:
|
||||
ass.append(
|
||||
re.search(
|
||||
"[ ((\[【+](([ +]?(ass|pgs|srt)){1,3})[))\]】]?", str(file_name)
|
||||
)
|
||||
.group(1)
|
||||
.strip(" ")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if ass:
|
||||
return ass
|
||||
else:
|
||||
return None
|
||||
|
||||
# 对以/分隔的多个翻译名,进行简单提取
|
||||
def easy_split(self, clean_name, zh_list, en_list, jp_list):
|
||||
if "/" in clean_name:
|
||||
n_list = clean_name.split("/")
|
||||
for k_i in n_list:
|
||||
if has_jp(k_i):
|
||||
jp_list.append(k_i.strip(" "))
|
||||
else:
|
||||
if has_zh(k_i) is False:
|
||||
en_list.append(k_i.strip(" "))
|
||||
elif has_en(k_i) is False:
|
||||
zh_list.append(k_i.strip(" "))
|
||||
elif has_zh(k_i) and has_en(k_i):
|
||||
# 如果还是同时包含中英文的情况,递龟一下
|
||||
if " " not in k_i:
|
||||
res = re.search(k_i, self.name.raw.lower())
|
||||
if res is not None:
|
||||
zh_list.append(res.group())
|
||||
else:
|
||||
k_i = add_separator(k_i)
|
||||
self.easy_split(k_i, zh_list, en_list, jp_list)
|
||||
else:
|
||||
self.easy_split(k_i, zh_list, en_list, jp_list)
|
||||
else:
|
||||
k_list = clean_name.split(" ")
|
||||
for k_i in k_list:
|
||||
if has_jp(k_i):
|
||||
jp_list.append(k_i.strip(" "))
|
||||
else:
|
||||
if has_zh(k_i) is False:
|
||||
en_list.append(k_i.strip(" "))
|
||||
elif has_en(k_i) is False:
|
||||
zh_list.append(k_i.strip(" "))
|
||||
elif has_zh(k_i) and has_en(k_i):
|
||||
res = re.search(k_i, self.name.raw.lower())
|
||||
if res is not None:
|
||||
zh_list.append(res.group())
|
||||
|
||||
# 混合验证
|
||||
def all_verity(self, raw_name):
|
||||
self.zh_list = (
|
||||
re_verity(self.zh_list, raw_name) if self.zh_list is not None else None
|
||||
)
|
||||
self.en_list = (
|
||||
re_verity(self.en_list, raw_name) if self.en_list is not None else None
|
||||
)
|
||||
self.jp_list = (
|
||||
re_verity(self.jp_list, raw_name) if self.jp_list is not None else None
|
||||
)
|
||||
|
||||
# 汇总信息
|
||||
def get_clean_name(self):
|
||||
# 获取到的信息
|
||||
info = {
|
||||
"group": self.info.group,
|
||||
"dpi": self.tag.dpi,
|
||||
"season": self.info.season,
|
||||
"episode": self.info.episode,
|
||||
"vision": self.info.vision,
|
||||
"lang": self.tag.lang,
|
||||
"ass": self.tag.ass,
|
||||
"type": self.tag.type,
|
||||
"code": self.tag.code,
|
||||
"source": self.tag.source,
|
||||
}
|
||||
# 字母全部小写
|
||||
clean_name = self.name.raw.lower()
|
||||
|
||||
# 去除拿到的有效信息
|
||||
for k, v in info.items():
|
||||
if v is not None:
|
||||
if type(v) is list:
|
||||
for i in v:
|
||||
clean_name = (
|
||||
clean_name.replace(i, "") if i is not None else clean_name
|
||||
)
|
||||
else:
|
||||
clean_name = clean_name.replace(v, "")
|
||||
|
||||
# 除杂
|
||||
x_list = [
|
||||
"pc&psp",
|
||||
"pc&psv",
|
||||
"movie",
|
||||
"bangumi.online",
|
||||
"donghua",
|
||||
"[_]",
|
||||
"仅限港澳台地区",
|
||||
"话全",
|
||||
"第话",
|
||||
"第集",
|
||||
"全集",
|
||||
"字幕",
|
||||
"话",
|
||||
"集",
|
||||
"粤",
|
||||
"+",
|
||||
"@",
|
||||
]
|
||||
for i in x_list:
|
||||
clean_name = clean_name.replace(i, "")
|
||||
# 去除多余空格
|
||||
clean_name = re.sub(" +", " ", clean_name).strip(" ").strip("-").strip(" ")
|
||||
# 去除空括号
|
||||
# !!! 不能删
|
||||
clean_name = clean_name.replace("][", "/")
|
||||
xx = re.search(
|
||||
"[\u4e00-\u9fa5\u3040-\u31ff ]([(\[。_])[\u4e00-\u9fa5\a-z]", clean_name
|
||||
)
|
||||
if xx is not None:
|
||||
clean_name = clean_name.replace(xx.group(1), "/")
|
||||
clean_name = re.sub("([(\[] *| *[)\]])", "", clean_name)
|
||||
|
||||
clean_name = re.sub("(/ */)", "/", clean_name)
|
||||
clean_name = re.sub(" +- +", "/", clean_name).strip("_").strip("/").strip(" ")
|
||||
return clean_name
|
||||
|
||||
# 提取标题
|
||||
def get_title(self):
|
||||
self.name.zh, self.name.en, self.name.jp = None, None, None
|
||||
# 国漫筛选
|
||||
if "国漫" in self.name.raw:
|
||||
zh = re.search(
|
||||
"-?([\u4e00-\u9fa5]{2,10})_?", self.name.raw.replace("[国漫]", "")
|
||||
)
|
||||
if zh is not None:
|
||||
self.name.zh = clean_list([zh.group()])
|
||||
return
|
||||
if "/" not in self.name.clean:
|
||||
if has_jp(self.name.clean) is False:
|
||||
if has_zh(self.name.clean) is False:
|
||||
en = re.search(self.name.clean, self.name.raw.lower())
|
||||
if en is not None:
|
||||
self.name.en = clean_list([en.group()])
|
||||
return
|
||||
elif (
|
||||
re.search(
|
||||
"(^[\u4e00-\u9fa5\u3040-\u31ff\d:\-·??、.。,!]{1,20}[a-z\d]{,3} ?!?)([a-z\d:\-.。,,!! ]* ?)",
|
||||
self.name.clean,
|
||||
)
|
||||
is not None
|
||||
):
|
||||
res = re.search(
|
||||
"(^[\u4e00-\u9fa5\u3040-\u31ff\d:\-·??、.。,!]{1,20}[a-z\d]{,3} ?!?)[._&]?([a-z\d:\-.。,,!! ]* ?)",
|
||||
self.name.clean,
|
||||
)
|
||||
zh = res.group(1)
|
||||
en = res.group(2)
|
||||
zh = re.search(zh, self.name.raw.lower())
|
||||
if zh is not None:
|
||||
self.name.zh = clean_list([zh.group()])
|
||||
en = re.search(en, self.name.raw.lower())
|
||||
if en is not None:
|
||||
self.name.en = clean_list([en.group()])
|
||||
return
|
||||
# 英中
|
||||
elif (
|
||||
re.search(
|
||||
"(^([a-z\d:\-_.。,,!! ]* ?) ?)[._&]?([\u4e00-\u9fa5\u3040-\u31ffa-z\d:\-_·??、.。,!! ]{1,20})",
|
||||
self.name.clean,
|
||||
)
|
||||
is not None
|
||||
):
|
||||
res = re.search(
|
||||
"(^([a-z\d:\-_.。,,!! ]* ?) ?)[._&]?([\u4e00-\u9fa5\u3040-\u31ffa-z\d:\-_·??、.。,!! ]{1,20})",
|
||||
self.name.clean,
|
||||
)
|
||||
|
||||
zh = res.group(3)
|
||||
en = res.group(1)
|
||||
zh = re.search(zh, self.name.raw.lower())
|
||||
if zh is not None:
|
||||
self.name.zh = clean_list([zh.group()])
|
||||
en = re.search(en, self.name.raw.lower())
|
||||
if en is not None:
|
||||
self.name.en = clean_list([en.group()])
|
||||
return
|
||||
elif len(re.findall("[a-zA-Z]", self.name.clean.lower())) < 10:
|
||||
zh = re.search(self.name.clean, self.name.raw.lower())
|
||||
if zh is not None:
|
||||
self.name.zh = clean_list([zh.group()])
|
||||
return
|
||||
if debug > 0:
|
||||
print("初筛:\r\n%s\r\n%s\r\n%s" % (self.zh_list, self.en_list, self.jp_list))
|
||||
if (has_zh(self.name.clean) or has_jp(self.name.clean)) and has_en(
|
||||
self.name.clean
|
||||
):
|
||||
self.name.clean = add_separator(self.name.clean)
|
||||
self.easy_split(self.name.clean, self.zh_list, self.en_list, self.jp_list)
|
||||
|
||||
if debug > 0:
|
||||
print("二筛:\r\n%s\r\n%s\r\n%s" % (self.zh_list, self.en_list, self.jp_list))
|
||||
# 结果反代入原名验证
|
||||
self.all_verity([self.name.raw, self.name.clean])
|
||||
|
||||
# 去除正确结果后,重新识别其他部分
|
||||
if self.jp_list:
|
||||
temp_name = del_rules(self.name.clean, self.jp_list)
|
||||
self.easy_split(temp_name, self.zh_list, self.en_list, self.jp_list)
|
||||
if self.zh_list and self.en_list == []:
|
||||
temp_name = del_rules(self.name.clean, self.zh_list)
|
||||
self.easy_split(temp_name, self.zh_list, self.en_list, self.jp_list)
|
||||
elif self.zh_list == [] and self.en_list:
|
||||
temp_name = del_rules(self.name.clean, self.en_list)
|
||||
self.easy_split(temp_name, self.zh_list, self.en_list, self.jp_list)
|
||||
while "" in self.en_list:
|
||||
self.en_list.remove("")
|
||||
if debug > 0:
|
||||
print("三筛:\r\n%s\r\n%s\r\n%s" % (self.zh_list, self.en_list, self.jp_list))
|
||||
# 一步一验
|
||||
self.all_verity([self.name.raw, self.name.clean])
|
||||
for _ in range(5):
|
||||
# 拼合碎片
|
||||
splicing(self.zh_list, self.zh_list, self.name.clean)
|
||||
splicing(self.en_list, self.en_list, self.name.clean)
|
||||
splicing(self.jp_list, self.jp_list, self.name.clean)
|
||||
try:
|
||||
# 拼合中英文碎片
|
||||
for i in self.en_list:
|
||||
for j in self.zh_list:
|
||||
res = re.search("%s +%s" % (i, j), self.name.raw.lower())
|
||||
if res is not None:
|
||||
self.en_list.remove(i)
|
||||
self.zh_list.append(res.group())
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if debug > 0:
|
||||
print("拼合:\r\n%s\r\n%s\r\n%s" % (self.zh_list, self.en_list, self.jp_list))
|
||||
# 再次验证,这里只能验raw名
|
||||
self.all_verity(self.name.raw)
|
||||
# 灌装
|
||||
self.name.zh = clean_list(self.zh_list)
|
||||
bug_list = ["不白吃话山海经"]
|
||||
for i in bug_list:
|
||||
if i in self.name.raw.lower():
|
||||
if has_zh(i):
|
||||
self.name.zh = [i]
|
||||
self.name.en = clean_list(self.en_list)
|
||||
self.name.jp = clean_list(self.jp_list)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug = 0
|
||||
# mikan/dmhy 获取数据,dmhy 最多1w行,mikan最多3w行
|
||||
# 数据序号,向下x条
|
||||
num = debug if debug > 1 else 800
|
||||
row = 1 if debug else 200
|
||||
name_list = read_data("mikan", num, row)
|
||||
for i in range(0, len(name_list)):
|
||||
info = RSSInfoCleaner(name_list[i])
|
||||
title = info.Name
|
||||
print("%s:%s" % (num + i, name_list[i]))
|
||||
print("raw_name:%s" % title.raw)
|
||||
print("clean_name:%s" % title.clean)
|
||||
print("zh:%s" % title.zh)
|
||||
print("en:%s" % title.en)
|
||||
print("jp:%s" % title.jp)
|
||||
print()
|
||||
@@ -1,74 +0,0 @@
|
||||
[
|
||||
{
|
||||
"group_name": [
|
||||
"Lilith-Raws x WitEx.io",
|
||||
"极影字幕社+辉夜汉化组",
|
||||
"Lilith-Raws",
|
||||
"NC-Raws",
|
||||
"Skymoon-Raws",
|
||||
"天月搬运组",
|
||||
"肥猫压制",
|
||||
"LoliHouse",
|
||||
"猎户不鸽发布组",
|
||||
"NaN-Raws",
|
||||
"猎户随缘发布组",
|
||||
"桜都字幕组",
|
||||
"澄空学园&雪飘工作室",
|
||||
"千夏字幕组",
|
||||
"百冬练习组",
|
||||
"IET字幕组",
|
||||
"离谱Sub",
|
||||
"酷漫404",
|
||||
"星空字幕组",
|
||||
"轻之国度字幕组",
|
||||
"枫叶字幕组",
|
||||
"雪飘工作室",
|
||||
"豌豆字幕组",
|
||||
"云光字幕组",
|
||||
"悠哈璃羽字幕社",
|
||||
"桜都字幕组",
|
||||
"ANi",
|
||||
"❀拨雪寻春❀",
|
||||
"极彩字幕组",
|
||||
"悠哈璃羽字幕社",
|
||||
"爱恋&漫猫字幕组",
|
||||
"MingY",
|
||||
"VCB-Studio",
|
||||
"喵萌奶茶屋",
|
||||
"爱恋字母社",
|
||||
"诸神字幕组",
|
||||
"驯兽师联盟",
|
||||
"夏沐字幕组",
|
||||
"动漫国字幕组",
|
||||
"百冬练习组s",
|
||||
"SweetSub&圆环记录攻略组",
|
||||
"动漫萌",
|
||||
"极影字幕社",
|
||||
"喵萌Production",
|
||||
"喵萌Production&LoliHouse",
|
||||
"60yrs ago",
|
||||
"50yrs ago",
|
||||
"41yrs ago",
|
||||
"40yrs ago",
|
||||
"30yrs ago",
|
||||
"20yrs ago",
|
||||
"s5291s",
|
||||
"xyx98",
|
||||
"nvacg",
|
||||
"RHxDymy",
|
||||
"PoInSu",
|
||||
"EMe",
|
||||
"DHR百合組",
|
||||
"雪飘工作室",
|
||||
"喵萌美食殿",
|
||||
"GalaxyRailroad-888",
|
||||
"APTX4869",
|
||||
"SummerHuo&ZJCONAN",
|
||||
"nyaa",
|
||||
"AngelEcho",
|
||||
"逆时针环游",
|
||||
"MCE汉化组"
|
||||
],
|
||||
"name_position": 1
|
||||
}
|
||||
]
|
||||
File diff suppressed because one or more lines are too long
@@ -1,225 +0,0 @@
|
||||
import re
|
||||
import logging
|
||||
|
||||
import csv
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def read_data(file_name, start, rows):
|
||||
if file_name == "mikan":
|
||||
with open('RssFilter/mikan.csv', 'r', encoding='utf-8') as csv_file:
|
||||
reader = csv.reader(csv_file)
|
||||
raw_data = [row[3] for row in reader][start:start + rows]
|
||||
return raw_data
|
||||
elif file_name == "dmhy":
|
||||
with open('RssFilter/dmhy.csv', 'r', encoding='utf-8') as csv_file:
|
||||
reader = csv.reader(csv_file)
|
||||
raw_data = [row[4] for row in reader][start + 1:start + rows + 1]
|
||||
return raw_data
|
||||
|
||||
|
||||
# 以 / 代替空格分隔中英文名
|
||||
def add_separator(clean_name):
|
||||
try:
|
||||
if '\u4e00' <= clean_name[0] <= '\u9fff':
|
||||
try:
|
||||
res = re.search(
|
||||
"(^[\u4e00-\u9fa5\u3040-\u31ff\d: \-·、.。,!!]{1,20}[ -_]{1,5})([a-z\d:\-.。,,!!]{1,20} ?){2,}",
|
||||
clean_name).group(1)
|
||||
clean_name = clean_name.replace(res, res.strip(" ") + "/")
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
else:
|
||||
try:
|
||||
res = re.search(
|
||||
"^(([a-z\d:\-.。,,!!]{1,20} ?){2,}[ -_]{1,5})[\u4e00-\u9fa5\u3040-\u31ff\d: \-·、.。,,!!]{1,20}",
|
||||
clean_name).group(1)
|
||||
clean_name = clean_name.replace(res, res.strip(" ") + "/")
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
clean_name = re.sub("(/ */)", "/", clean_name)
|
||||
return clean_name
|
||||
|
||||
|
||||
# 拼合碎片
|
||||
def splicing(frag_list, name_list, raw_name):
|
||||
try:
|
||||
for i in range(0, len(name_list) - 1):
|
||||
if name_list[i] in name_list[i + 1] and name_list[i] != name_list[i + 1]:
|
||||
name_list.remove(name_list[i])
|
||||
elif name_list[i + 1] in name_list[i] and name_list[i] != name_list[i + 1]:
|
||||
name_list.remove(name_list[i + 1])
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
min_list = sorted(name_list, key=lambda i: len(i), reverse=False)
|
||||
for i in range(0, len(min_list) - 1):
|
||||
# 处理中英文混合名
|
||||
if frag_list is not None and len(frag_list) > 1:
|
||||
fragment = min_list[i]
|
||||
try:
|
||||
if fragment in raw_name.lower():
|
||||
for piece_name in name_list:
|
||||
try:
|
||||
r_name = re.search("(%s {0,3}%s|%s {0,5}%s)" % (fragment, piece_name, piece_name, fragment),
|
||||
raw_name.lower())
|
||||
if r_name is not None:
|
||||
frag_list.remove(fragment)
|
||||
name_list.remove(piece_name)
|
||||
name_list.append(r_name.group())
|
||||
except Exception as e:
|
||||
logger.warning("bug--%s" % e)
|
||||
logger.warning("piece_name:%s,fragment:%s" % (piece_name, fragment))
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
|
||||
|
||||
# 清理列表
|
||||
def clean_list(raw_list):
|
||||
if raw_list is not None:
|
||||
# 去除碎片和杂质
|
||||
raw_list = [x.strip("-").strip(" ") for x in raw_list if len(x) > 1]
|
||||
# 小碎片归并
|
||||
for _ in range(len(raw_list)):
|
||||
if raw_list is not None and len(raw_list) > 1:
|
||||
try:
|
||||
for i in range(0, len(raw_list) - 1):
|
||||
if raw_list[i] in raw_list[i + 1] and raw_list[i] != raw_list[i + 1]:
|
||||
raw_list.remove(raw_list[i])
|
||||
elif raw_list[i + 1] in raw_list[i] and raw_list[i] != raw_list[i + 1]:
|
||||
raw_list.remove(raw_list[i + 1])
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if raw_list is not None and len(raw_list) > 1:
|
||||
try:
|
||||
for i in range(0, len(raw_list)):
|
||||
up_list = sorted(raw_list, key=lambda i: len(i), reverse=False)
|
||||
if up_list[i] in up_list[-1] and up_list[i] != up_list[-1]:
|
||||
raw_list.remove(up_list[i])
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
if raw_list:
|
||||
return set(raw_list)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
# 粗略识别失败,re强制匹配
|
||||
def extract_title(raw_name):
|
||||
title = {
|
||||
"zh": None,
|
||||
"en": None,
|
||||
}
|
||||
clean_name = raw_name
|
||||
|
||||
if has_en(clean_name) and has_zh(clean_name):
|
||||
# 中英
|
||||
try:
|
||||
res = re.search("(([\u4e00-\u9fa5]{2,12}[ /:]{0,3}){1,5}) {0,5}(( ?[a-z':]{1,15}){1,15})", clean_name)
|
||||
title["zh"] = res.group(1).strip(" ")
|
||||
title["en"] = res.group(3).strip(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 本程序依赖此bug运行,这行不能删
|
||||
if title["zh"] is None:
|
||||
# 中英
|
||||
try:
|
||||
res = re.search(
|
||||
"(([\u4e00-\u9fa5a]{1,12}[ /:]{0,3}){1,5})[&/ (]{0,5}(( ?[a-z':]{1,15}){1,15})[ )/]{0,3}",
|
||||
clean_name)
|
||||
title["zh"] = res.group(1).strip(" ")
|
||||
title["en"] = res.group(3).strip(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
# 英中
|
||||
try:
|
||||
res = re.search(
|
||||
"(([ a-z'.:]{1,20}){1,8})[&/ (]{0,5}(([\u4e00-\u9fa5a]{2,10}[a-z]{0,3} ?){1,5})[ )/]{0,3}",
|
||||
clean_name)
|
||||
title["en"] = res.group(1).strip(" ")
|
||||
title["zh"] = res.group(3).strip(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
else:
|
||||
if has_zh(clean_name):
|
||||
# 中文
|
||||
try:
|
||||
res = re.search("(([\u4e00-\u9fa5:]{2,15}[ /]?){1,5}) *", clean_name)
|
||||
title["zh"] = res.group(1).strip(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
elif has_en(clean_name):
|
||||
# 英文
|
||||
try:
|
||||
res = re.search("(([a-z:]{2,15}[ /]?){1,15}) *", clean_name)
|
||||
title["en"] = res.group(1).strip(" ")
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
for k, v in title.items():
|
||||
if v is not None and "/" in v:
|
||||
zh_list = v.split("/")
|
||||
title[k] = zh_list[0].strip(" ")
|
||||
return title
|
||||
|
||||
|
||||
def del_rules(raw_name, rule_list):
|
||||
for i in rule_list:
|
||||
raw_name = raw_name.replace(i, "")
|
||||
return raw_name
|
||||
|
||||
|
||||
# 获取字符串出现位置
|
||||
def get_str_location(char, target):
|
||||
locate = []
|
||||
for index, value in enumerate(char):
|
||||
if target == value:
|
||||
locate.append(index)
|
||||
return locate
|
||||
|
||||
|
||||
# 匹配某字符串最近的括号
|
||||
def get_gp(char, string):
|
||||
begin = [x for x in get_str_location(string, "[") if int(x) < int(string.find(char))][-1] + 1
|
||||
end = [x for x in get_str_location(string, "]") if int(x) > int(string.find(char))][0]
|
||||
return string[begin:end]
|
||||
|
||||
|
||||
def has_en(str):
|
||||
my_re = re.compile(r'[a-z]', re.S)
|
||||
res = re.findall(my_re, str)
|
||||
if len(res):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def has_zh(str):
|
||||
my_re = re.compile(r'[\u4e00-\u9fa5]', re.S)
|
||||
res = re.findall(my_re, str)
|
||||
if len(res):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def has_jp(str):
|
||||
my_re = re.compile(r'[\u3040-\u31ff]', re.S)
|
||||
res = re.findall(my_re, str)
|
||||
if len(res):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# 单list验证
|
||||
def re_verity(raw_list, raw_name):
|
||||
correct_list = []
|
||||
for c_res in raw_list:
|
||||
if type(raw_name) is list:
|
||||
if c_res in raw_name[0].lower() and c_res in raw_name[1].lower():
|
||||
correct_list.append(c_res)
|
||||
else:
|
||||
if c_res in raw_name.lower():
|
||||
correct_list.append(c_res)
|
||||
return correct_list
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,6 @@ from conf import settings
|
||||
from argument_parser import parse
|
||||
from log import setup_logger
|
||||
from utils import json_config
|
||||
from env_bool import init_switch
|
||||
|
||||
from core.rss_collector import RSSCollector
|
||||
from core.download_client import DownloadClient
|
||||
|
||||
@@ -10,7 +10,6 @@ DEFAULT_SETTINGS = {
|
||||
"method": "pn",
|
||||
"enable_group_tag": True,
|
||||
"info_path": "/config/bangumi.json",
|
||||
"rule_path": "/config/rule_beta.json",
|
||||
"not_contain": "720",
|
||||
"get_rule_debug": False,
|
||||
"rule_url": "https://raw.githubusercontent.com/EstrellaXD/Bangumi_Auto_Collector/main/AutoBangumi/config/rule.json",
|
||||
|
||||
@@ -7,7 +7,6 @@ from downloader import getClient
|
||||
from downloader.exceptions import ConflictError
|
||||
|
||||
from conf import settings
|
||||
from utils import json_config
|
||||
|
||||
from core.eps_complete import FullSeasonGet
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ import os.path
|
||||
import re
|
||||
|
||||
import requests
|
||||
from qbittorrentapi import Client
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
|
||||
@@ -58,8 +57,3 @@ class FullSeasonGet:
|
||||
return downloads
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
a = FullSeasonGet("Lilith-Raws", "Shijou Saikyou no Daimaou", "S01")
|
||||
a.run()
|
||||
for torrent in a.torrents:
|
||||
logger.debug(torrent["url"])
|
||||
|
||||
@@ -8,7 +8,6 @@ from core.download_client import DownloadClient
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
class Renamer:
|
||||
def __init__(self, downloadClient: DownloadClient):
|
||||
self.client = downloadClient
|
||||
@@ -30,13 +29,13 @@ class Renamer:
|
||||
return new_name
|
||||
|
||||
def rename_pn(self, name):
|
||||
n = re.split(r"\[|\]", name)
|
||||
n = re.split(r"[\[\]]", name)
|
||||
file_name = name.replace(f"[{n[1]}]", "")
|
||||
for rule in self.rules:
|
||||
matchObj = re.match(rule, file_name, re.I)
|
||||
if matchObj is not None:
|
||||
new_name = re.sub(
|
||||
r"\[|\]",
|
||||
r"[\[\]]",
|
||||
"",
|
||||
f"{matchObj.group(1).strip()} E{matchObj.group(2)}{n[-1]}",
|
||||
)
|
||||
@@ -71,7 +70,3 @@ class Renamer:
|
||||
self.client.delete_torrent(info.hash)
|
||||
self.print_result(torrent_count, rename_count)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rename = Renamer()
|
||||
rename.rename_pn("[Lilith-Raws] Shokei Shoujo no Virgin Road - 02 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]")
|
||||
@@ -1,5 +1,4 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
import os
|
||||
import logging
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@@ -3,7 +3,6 @@ import time
|
||||
|
||||
from qbittorrentapi import Client, LoginFailed
|
||||
from qbittorrentapi.exceptions import Conflict409Error
|
||||
from tomlkit import item
|
||||
|
||||
from conf import settings
|
||||
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
from conf import settings
|
||||
|
||||
bool_group = [
|
||||
settings.enable_group_tag,
|
||||
settings.get_rule_debug,
|
||||
settings.debug_mode,
|
||||
settings.enable_eps_complete,
|
||||
settings.season_one_tag
|
||||
]
|
||||
|
||||
|
||||
def init_switch():
|
||||
if settings.sleep_time is str:
|
||||
settings.sleep_time = float(settings.sleep_time)
|
||||
for switch in bool_group:
|
||||
if switch is str:
|
||||
switch = switch.lower() in ("true", "t", "i")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
settings.init()
|
||||
print(type(settings.debug_mode))
|
||||
@@ -1,59 +0,0 @@
|
||||
[
|
||||
{
|
||||
"group_name": [
|
||||
"Lilith-Raws",
|
||||
"NC-Raws",
|
||||
"Skymoon-Raws",
|
||||
"天月搬运组",
|
||||
"LoliHouse",
|
||||
"猎户不鸽发布组",
|
||||
"NaN-Raws",
|
||||
"猎户随缘发布组",
|
||||
"桜都字幕组",
|
||||
"澄空学园&雪飘工作室",
|
||||
"千夏字幕组",
|
||||
"IET字幕组",
|
||||
"离谱Sub",
|
||||
"酷漫404",
|
||||
"星空字幕组",
|
||||
"轻之国度字幕组",
|
||||
"枫叶字幕组",
|
||||
"雪飘工作室",
|
||||
"豌豆字幕组",
|
||||
"云光字幕组",
|
||||
"悠哈璃羽字幕社",
|
||||
"桜都字幕组",
|
||||
"ANi",
|
||||
"❀拨雪寻春❀",
|
||||
"极彩字幕组",
|
||||
"悠哈璃羽字幕社",
|
||||
"爱恋&漫猫字幕组",
|
||||
"MingY",
|
||||
"VCB-Studio",
|
||||
"虹咲学园烤肉同好会"
|
||||
],
|
||||
"name_position": 1
|
||||
},
|
||||
{
|
||||
"group_name": [
|
||||
"喵萌奶茶屋",
|
||||
"爱恋字母社",
|
||||
"诸神字幕组",
|
||||
"驯兽师联盟",
|
||||
"夏沐字幕组",
|
||||
"幻樱",
|
||||
"动漫国字幕组",
|
||||
"SweetSub&圆环记录攻略组",
|
||||
"动漫萌",
|
||||
"极影字幕社",
|
||||
"喵萌Production"
|
||||
],
|
||||
"name_position": 2
|
||||
},
|
||||
{
|
||||
"group_name": [
|
||||
"爱恋字母社"
|
||||
],
|
||||
"name_position": 3
|
||||
}
|
||||
]
|
||||
@@ -1,67 +0,0 @@
|
||||
[
|
||||
{
|
||||
"group_name": [
|
||||
"Lilith-Raws",
|
||||
"NC-Raws",
|
||||
"Skymoon-Raws",
|
||||
"天月搬运组",
|
||||
"LoliHouse",
|
||||
"猎户不鸽发布组",
|
||||
"NaN-Raws",
|
||||
"猎户随缘发布组",
|
||||
"桜都字幕组",
|
||||
"澄空学园&雪飘工作室",
|
||||
"千夏字幕组",
|
||||
"IET字幕组",
|
||||
"离谱Sub",
|
||||
"酷漫404",
|
||||
"星空字幕组",
|
||||
"轻之国度字幕组",
|
||||
"枫叶字幕组",
|
||||
"雪飘工作室",
|
||||
"豌豆字幕组",
|
||||
"云光字幕组",
|
||||
"悠哈璃羽字幕社",
|
||||
"桜都字幕组",
|
||||
"ANi",
|
||||
"❀拨雪寻春❀",
|
||||
"极彩字幕组",
|
||||
"悠哈璃羽字幕社",
|
||||
"爱恋&漫猫字幕组",
|
||||
"MingY",
|
||||
"VCB-Studio",
|
||||
"虹咲学园烤肉同好会",
|
||||
"喵萌奶茶屋",
|
||||
"爱恋字母社",
|
||||
"诸神字幕组",
|
||||
"驯兽师联盟",
|
||||
"夏沐字幕组",
|
||||
"幻樱",
|
||||
"动漫国字幕组",
|
||||
"SweetSub&圆环记录攻略组",
|
||||
"动漫萌",
|
||||
"极影字幕社",
|
||||
"喵萌Production",
|
||||
"Lolihouse",
|
||||
"PCSUB",
|
||||
"桜都字幕組",
|
||||
"LowPower-Raws",
|
||||
"雪飄工作室"
|
||||
],
|
||||
"type": 1
|
||||
},
|
||||
{
|
||||
"group_name": [
|
||||
"爱恋字母社",
|
||||
"風車字幕組",
|
||||
"幻櫻字幕組",
|
||||
"幻櫻字幕組",
|
||||
"Dymy字幕組",
|
||||
"動漫國字幕組",
|
||||
"c.c動漫",
|
||||
"动漫国字幕组",
|
||||
"c.c动漫"
|
||||
],
|
||||
"type": 2
|
||||
}
|
||||
]
|
||||
@@ -3,5 +3,4 @@ bs4
|
||||
requests
|
||||
lxml
|
||||
zhconv
|
||||
tomlkit
|
||||
|
||||
|
||||
Reference in New Issue
Block a user