mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-03-31 01:01:31 +08:00
93 lines
3.5 KiB
Python
93 lines
3.5 KiB
Python
import re
|
||
import sys
|
||
import time
|
||
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
import json
|
||
|
||
|
||
class CollectRSS:
|
||
def __init__(self, info):
|
||
self.bangumi_list = []
|
||
with open("rule.json") as f:
|
||
self.rules = json.load(f)
|
||
url = "https://mikanani.me/RSS/Classic"
|
||
rss = requests.get(url, 'utf-8')
|
||
soup = BeautifulSoup(rss.text, 'xml')
|
||
self.items = soup.find_all('item')
|
||
self.info = info
|
||
|
||
def get_info_list(self):
|
||
split_rule = r"\[|\]|\【|\】|\★|\(|\)|\(|\)"
|
||
last_rule = r"(.*)( \-)"
|
||
for item in self.items:
|
||
name = item.title.string
|
||
exit_flag = False
|
||
for rule in self.rules:
|
||
for group in rule["group_name"]:
|
||
if re.search(group, name):
|
||
exit_flag = True
|
||
n = re.split(split_rule, name)
|
||
while '' in n:
|
||
n.remove('')
|
||
while ' ' in n:
|
||
n.remove(' ')
|
||
try:
|
||
bangumi_title = n[rule['name_position']].strip()
|
||
except IndexError:
|
||
continue
|
||
sub_title = re.sub(r"[^\x00-\xff]{1,}| \d{1,2}|\·","",bangumi_title)
|
||
b = re.split(r"\/|\_", sub_title)
|
||
while '' in b:
|
||
b.remove('')
|
||
pre_name = max(b, key=len, default='').strip()
|
||
if pre_name != '':
|
||
bangumi_title = pre_name
|
||
for i in range(2):
|
||
match_obj = re.match(last_rule, bangumi_title, re.I)
|
||
if match_obj is not None:
|
||
bangumi_title = match_obj.group(1).strip()
|
||
match_obj = re.match(r"(S\d{1,2}(.*))", bangumi_title, re.I)
|
||
if match_obj is not None:
|
||
bangumi_title = match_obj.group(2).strip()
|
||
if bangumi_title not in self.bangumi_list:
|
||
self.bangumi_list.append(bangumi_title)
|
||
break
|
||
if exit_flag:
|
||
break
|
||
if not exit_flag:
|
||
print(f"ERROR Not match with {name}")
|
||
|
||
def put_info_json(self):
|
||
season_match = r"(.*)(Season \d{1,2}|S\d{1,2}|第.*季)"
|
||
|
||
had_data = []
|
||
for data in self.info:
|
||
had_data.append(data["title"])
|
||
|
||
for title in self.bangumi_list:
|
||
match_title_season = re.match(season_match, title, re.I)
|
||
if match_title_season is not None:
|
||
json_title = match_title_season.group(1).strip()
|
||
json_season = match_title_season.group(2)
|
||
else:
|
||
json_season = ''
|
||
json_title = title
|
||
if json_title not in had_data:
|
||
self.info.append({
|
||
"title": json_title,
|
||
"season": json_season
|
||
})
|
||
sys.stdout.write(f"[{time.strftime('%Y-%m-%d %X')}] add {json_title} {json_season}" + "\n")
|
||
sys.stdout.flush()
|
||
with open("bangumi.json", 'w', encoding='utf8') as f:
|
||
json.dump(self.info, f, indent=4, separators=(',', ': '), ensure_ascii=False)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
with open("bangumi.json") as f:
|
||
info = json.load(f)
|
||
cr = CollectRSS(info)
|
||
cr.get_info_list()
|
||
cr.put_info_json() |