mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-14 10:30:35 +08:00
feat: integrate OpenAIParserto TitleParser
This commit is contained in:
@@ -9,7 +9,8 @@ logger = logging.getLogger(__name__)
|
||||
DEFAULT_PROMPT = """\
|
||||
You will now play the role of a super assistant.
|
||||
Your task is to extract structured data from unstructured text content and output it in JSON format.
|
||||
If you are unable to extract any information, please leave the field empty. Do not fabricate data!
|
||||
If you are unable to extract any information, please keep all fields and leave the field empty or default value like `''`, `None`.
|
||||
But Do not fabricate data!
|
||||
|
||||
the python structured data type is:
|
||||
|
||||
@@ -32,13 +33,13 @@ Example:
|
||||
|
||||
```
|
||||
input: "【喵萌奶茶屋】★04月新番★[夏日重现/Summer Time Rendering][11][1080p][繁日双语][招募翻译]"
|
||||
output: '{"group": "喵萌奶茶屋", "title_en": "Summer Time Rendering", "resolution": "1080p", "episode": 11, "season": 1}'
|
||||
output: '{"group": "喵萌奶茶屋", "title_en": "Summer Time Rendering", "resolution": "1080p", "episode": 11, "season": 1, "title_zh": "夏日重现", "sub": "", "title_jp": "", "season_raw": "", "source": ""}'
|
||||
|
||||
input: "【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"
|
||||
output: '{"group": "幻樱字幕组", "title_en": "Komi-san wa, Komyushou Desu.", "resolution": "1920X1080", "episode": 22, "season": 2}'
|
||||
output: '{"group": "幻樱字幕组", "title_en": "Komi-san wa, Komyushou Desu.", "resolution": "1920X1080", "episode": 22, "season": 2, "title_zh": "古见同学有交流障碍症", "sub": "", "title_jp": "", "season_raw": "", "source": ""}'
|
||||
|
||||
input: "[Lilith-Raws] 关于我在无意间被隔壁的天使变成废柴这件事 / Otonari no Tenshi-sama - 09 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]"
|
||||
output: '{"group": "Lilith-Raws", "title_en": "Otonari no Tenshi-sama", "resolution": "1080p", "episode": 9, "season": 1}'
|
||||
output: '{"group": "Lilith-Raws", "title_en": "Otonari no Tenshi-sama", "resolution": "1080p", "episode": 9, "season": 1, "source": "WEB-DL", "title_zh": "关于我在无意间被隔壁的天使变成废柴这件事", "sub": "CHT", "title_jp": ""}'
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
@@ -2,8 +2,10 @@ import logging
|
||||
|
||||
from module.conf import settings
|
||||
from module.models import Bangumi
|
||||
from module.models.bangumi import Episode
|
||||
from module.parser.openai import OpenAIParser
|
||||
|
||||
from .analyser import raw_parser, tmdb_parser, torrent_parser, mikan_parser
|
||||
from .analyser import mikan_parser, raw_parser, tmdb_parser, torrent_parser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -43,14 +45,28 @@ class TitleParser:
|
||||
logger.debug(f"TMDB Matched, official title is {tmdb_info.title}")
|
||||
bangumi.poster_link = tmdb_info.poster_link
|
||||
else:
|
||||
logger.warning(f"Cannot match {bangumi.official_title} in TMDB. Use raw title instead.")
|
||||
logger.warning(
|
||||
f"Cannot match {bangumi.official_title} in TMDB. Use raw title instead."
|
||||
)
|
||||
logger.warning("Please change bangumi info manually.")
|
||||
|
||||
@staticmethod
|
||||
def raw_parser(raw: str) -> Bangumi | None:
|
||||
language = settings.rss_parser.language
|
||||
try:
|
||||
episode = raw_parser(raw)
|
||||
# use OpenAI ChatGPT to parse raw title and get structured data
|
||||
if settings.experimental.openai_enable:
|
||||
gpt = OpenAIParser(
|
||||
api_key=settings.experimental.openai_api_key,
|
||||
api_base=settings.experimental.openai_api_base,
|
||||
model=settings.experimental.openai_model,
|
||||
)
|
||||
episode_dict = gpt.parse(raw, asdict=True)
|
||||
print(f"Episode dict: {episode_dict}")
|
||||
episode = Episode(**episode_dict)
|
||||
else:
|
||||
episode = raw_parser(raw)
|
||||
|
||||
titles = {
|
||||
"zh": episode.title_zh,
|
||||
"en": episode.title_en,
|
||||
|
||||
30
backend/src/test/test_title_parser.py
Normal file
30
backend/src/test/test_title_parser.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from module.conf import settings
|
||||
from module.parser.title_parser import TitleParser
|
||||
|
||||
|
||||
class TestTitleParser:
|
||||
def test_parse_without_openai(self):
|
||||
text = "[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"
|
||||
result = TitleParser.raw_parser(text)
|
||||
assert result.group_name == "梦蓝字幕组"
|
||||
assert result.title_raw == "New Doraemon"
|
||||
assert result.dpi == "1080P"
|
||||
assert result.season == 1
|
||||
assert result.subtitle == "GB_JP"
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not settings.experimental.openai_enable,
|
||||
reason="OpenAI is not enabled in settings",
|
||||
)
|
||||
def test_parse_with_openai(self):
|
||||
text = "[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"
|
||||
result = TitleParser.raw_parser(text)
|
||||
assert result.group_name == "梦蓝字幕组"
|
||||
assert result.title_raw == "New Doraemon"
|
||||
assert result.dpi == "1080P"
|
||||
assert result.season == 1
|
||||
assert result.subtitle == "GB_JP"
|
||||
Reference in New Issue
Block a user