This commit is contained in:
EstrellaXD
2022-05-27 23:53:21 +08:00
parent 4543104ffb
commit 5a2eb7ed4d
6 changed files with 41 additions and 39 deletions

View File

@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (2)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
</project>

View File

@@ -1,28 +1,10 @@
import re
import csv
import json
import time
import zhconv
import requests
import logging
import pandas as pd
def read_data(name, rows):
if name == "mikan":
with open('mikan.csv', 'r', encoding='utf-8') as csv_file:
reader = csv.reader(csv_file)
raw_data = [row[3] for row in reader][0:rows]
return raw_data
elif name == "dmhy":
with open('dmhy.csv', 'r', encoding='utf-8') as csv_file:
reader = csv.reader(csv_file)
raw_data = [row[4] for row in reader][1:rows + 1]
return raw_data
class Rename:
class RSSInfoCleaner:
class Name:
raw_name = None
group = None
@@ -451,14 +433,12 @@ class Rename:
try:
res = re.search("(^[a\u4e00-\u9fa5: ]{1,10} ?)([a-z:]{1,20} ?){1,10}", clean_name).group(1)
clean_name = clean_name.replace(res, res.strip(" ") + "/")
print("zh_pre:%s" % clean_name)
except Exception as e:
logging.info(e)
else:
try:
res = re.search("^(([a-z:]{1,20} ?){1,10} )[\u4e00-\u9fa5: a]{1,20}", clean_name).group(1)
clean_name = clean_name.replace(res, res.strip(" ") + "/")
print("en_pre:%s" % clean_name)
except Exception as e:
logging.info(e)
except Exception as e:
@@ -502,6 +482,7 @@ class Rename:
# 字母全部小写
clean_name = self.Name.file_name.lower()
# clean_name = self.Name.file_name
# 去除拿到的有效信息
for k, v in info.items():
if v is not None:
@@ -525,7 +506,6 @@ class Rename:
clean_name = re.sub('[^a-zA-Z\u4e00-\u9fa5:@#$%^&*()\[\]/ ]', "", clean_name)
clean_name = re.sub(' +', ' ', clean_name).strip(" ")
clean_name = re.sub("([(\[] *| *[)\]])", "", clean_name)
print(clean_name)
zh_list = []
en_list = []
@@ -540,12 +520,3 @@ class Rename:
return info
if __name__ == "__main__":
# mikan/dmhy 获取数据dmhy 最多1w行mikan最多3w行
name_list = read_data("dmhy", 1000)
start = time.time()
for name in name_list:
print(name)
print(Rename(name).Name.zh)
print()
print("%s" % (time.time() - start))

View File

@@ -4,8 +4,8 @@ import requests
from bs4 import BeautifulSoup
import json
import re
from env import EnvInfo
from env import EnvInfo, BColors
from AutoBangumi.app.RssFliter.RSSFliter import RSSInfoCleaner as Cleaner
class MatchRule:
split_rule = r"\[|\]|\【|\】|\★|\|\|\(|\)"
@@ -118,6 +118,25 @@ class CollectRSS:
if __name__ == "__main__":
cr = CollectRSS()
cr.get_info_list()
cr.put_info_json()
rss = requests.get(EnvInfo.rss_link, 'utf-8')
soup = BeautifulSoup(rss.text, 'xml')
items = soup.find_all('item')
for item in items:
name = item.title.string
print(BColors.HEADER + name + BColors.OKGREEN)
pn = Cleaner(name).Name
if pn.en is not None:
if type(pn.en) is list:
for n in pn.en:
print(n)
else:
print(pn.en)
else:
if type(pn.zh) is list:
for n in pn.zh:
print(n)
else:
print(pn.zh)
# print(BColors.HEADER + name)
# print(BColors.OKGREEN + str(pn.Name.en))

View File

@@ -3,7 +3,7 @@ import time
class EnvInfo:
debug_mode = False
debug_mode = True
# Docker Env
if not debug_mode:
host_ip = os.environ["HOST"]
@@ -36,3 +36,15 @@ class EnvInfo:
rule_url = "https://raw.githubusercontent.com/EstrellaXD/Bangumi_Auto_Collector/main/AutoBangumi/config/rule.json"
time_show_obj = time.strftime('%Y-%m-%d %X')
rule_name_re = r"\:|\/|\."
class BColors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'