mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-14 18:41:04 +08:00
129 lines
4.2 KiB
Python
129 lines
4.2 KiB
Python
import asyncio
|
|
import json
|
|
import logging
|
|
|
|
import openai
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DEFAULT_PROMPT = """\
|
|
You will now play the role of a super assistant.
|
|
Your task is to extract structured data from unstructured text content and output it in JSON format.
|
|
If you are unable to extract any information, please leave the field empty. Do not fabricate data!
|
|
|
|
the python structured data type is:
|
|
|
|
```python
|
|
@dataclass
|
|
class Episode:
|
|
title_en: Optional[str]
|
|
title_zh: Optional[str]
|
|
title_jp: Optional[str]
|
|
season: int
|
|
season_raw: str
|
|
episode: int
|
|
sub: str
|
|
group: str
|
|
resolution: str
|
|
source: str
|
|
```
|
|
|
|
Example:
|
|
|
|
```
|
|
input: "【喵萌奶茶屋】★04月新番★[夏日重现/Summer Time Rendering][11][1080p][繁日双语][招募翻译]"
|
|
output: '{"group": "喵萌奶茶屋", "title_en": "Summer Time Rendering", "resolution": "1080p", "episode": 11, "season": 1}'
|
|
|
|
input: "【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"
|
|
output: '{"group": "幻樱字幕组", "title_en": "Komi-san wa, Komyushou Desu.", "resolution": "1920X1080", "episode": 22, "season": 2}'
|
|
|
|
input: "[Lilith-Raws] 关于我在无意间被隔壁的天使变成废柴这件事 / Otonari no Tenshi-sama - 09 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]"
|
|
output: '{"group": "Lilith-Raws", "title_en": "Otonari no Tenshi-sama", "resolution": "1080p", "episode": 9, "season": 1}'
|
|
```
|
|
"""
|
|
|
|
|
|
class OpenAIParser:
|
|
def __init__(
|
|
self,
|
|
api_key: str,
|
|
api_base: str = "https://api.openai.com/v1",
|
|
model: str = "gpt-3.5-turbo",
|
|
**kwargs,
|
|
) -> None:
|
|
"""OpenAIParser is a class to parse text with openai
|
|
|
|
Args:
|
|
api_key (str): the OpenAI api key
|
|
api_base (str):
|
|
the OpenAI api base url, you can use custom url here. \
|
|
Defaults to "https://api.openai.com/v1".
|
|
model (str):
|
|
the ChatGPT model parameter, you can get more details from \
|
|
https://platform.openai.com/docs/api-reference/chat/create. \
|
|
Defaults to "gpt-3.5-turbo".
|
|
kwargs (dict):
|
|
the OpenAI ChatGPT parameters, you can get more details from \
|
|
https://platform.openai.com/docs/api-reference/chat/create.
|
|
|
|
Raises:
|
|
ValueError: if api_key is not provided.
|
|
"""
|
|
if not api_key:
|
|
raise ValueError("API key is required.")
|
|
|
|
self._api_key = api_key
|
|
self.api_base = api_base
|
|
self.model = model
|
|
self.openai_kwargs = kwargs
|
|
|
|
def parse(
|
|
self, text: str, prompt: str | None = None, asdict: bool = True
|
|
) -> dict | str:
|
|
"""parse text with openai
|
|
|
|
Args:
|
|
text (str): the text to be parsed
|
|
prompt (str | None, optional):
|
|
the custom prompt. Built-in prompt will be used if no prompt is provided. \
|
|
Defaults to None.
|
|
asdict (bool, optional):
|
|
whether to return the result as dict or not. \
|
|
Defaults to True.
|
|
|
|
Returns:
|
|
dict | str: the parsed result.
|
|
"""
|
|
if not prompt:
|
|
prompt = DEFAULT_PROMPT
|
|
|
|
async def complete() -> str:
|
|
resp = await openai.ChatCompletion.acreate(
|
|
api_key=self._api_key,
|
|
api_base=self.api_base,
|
|
model=self.model,
|
|
messages=[
|
|
dict(role="system", content=prompt),
|
|
dict(role="user", content=text),
|
|
],
|
|
# set temperature to 0 to make results be more stable and reproducible.
|
|
temperature=0,
|
|
**self.openai_kwargs,
|
|
)
|
|
|
|
result = resp["choices"][0]["message"]["content"]
|
|
return result
|
|
|
|
loop = asyncio.get_event_loop()
|
|
result = loop.run_until_complete(complete())
|
|
|
|
if asdict:
|
|
try:
|
|
result = json.loads(result)
|
|
except json.JSONDecodeError:
|
|
logger.warning(f"Cannot parse result {result} as python dict.")
|
|
|
|
logger.debug(f"the parsed result is: {result}")
|
|
|
|
return result
|