From 8477774b628c4f98e1c1ef64c6e4d931ad924a25 Mon Sep 17 00:00:00 2001 From: 100gle Date: Wed, 27 Sep 2023 16:53:34 +0800 Subject: [PATCH 01/24] feat: add basic OpenAI parser implementation --- backend/requirements-dev.txt | 3 +- backend/requirements.txt | 1 + backend/src/module/parser/openai.py | 84 +++++++++++++++++++++++++++++ backend/src/test/test_openai.py | 18 +++++++ 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 backend/src/test/test_openai.py diff --git a/backend/requirements-dev.txt b/backend/requirements-dev.txt index 8240495a..a65cdcbc 100644 --- a/backend/requirements-dev.txt +++ b/backend/requirements-dev.txt @@ -1,4 +1,5 @@ -r requirements.txt ruff black -pre-commit \ No newline at end of file +pre-commit +pytest \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 0ac360cd..3929ce80 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -26,3 +26,4 @@ python-multipart==0.0.6 sqlmodel==0.0.8 sse-starlette==1.6.5 semver==3.0.1 +openai==0.28.1 diff --git a/backend/src/module/parser/openai.py b/backend/src/module/parser/openai.py index 8b137891..1b77ef9a 100644 --- a/backend/src/module/parser/openai.py +++ b/backend/src/module/parser/openai.py @@ -1 +1,85 @@ +import asyncio +import logging +import openai + +DEFAULT_PROMPT = """\ +You will now play the role of a super assistant. +Your task is to extract structured data from unstructured text content and output it in JSON format. +If you are unable to extract any information, please leave the field empty. Do not fabricate data! + +the python structured data type is: + +```python +@dataclass +class Episode: + title_en: Optional[str] + title_zh: Optional[str] + title_jp: Optional[str] + season: int + season_raw: str + episode: int + sub: str + group: str + resolution: str + source: str +``` + +Example: + +``` +input: "【喵萌奶茶屋】★04月新番★[夏日重现/Summer Time Rendering][11][1080p][繁日双语][招募翻译]" +output: '{"group": "喵萌奶茶屋", "title_en": "Summer Time Rendering", "resolution": "1080p", "episode": 11, "season": 1}' + +input: "【幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】" +output: '{"group": "幻樱字幕组", "title_en": "Komi-san wa, Komyushou Desu.", "resolution": "1920X1080", "episode": 22, "season": 2}' + +input: "[Lilith-Raws] 关于我在无意间被隔壁的天使变成废柴这件事 / Otonari no Tenshi-sama - 09 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]" +output: '{"group": "Lilith-Raws", "title_en": "Otonari no Tenshi-sama", "resolution": "1080p", "episode": 9, "season": 1}' +``` +""" + + +class OpenAIParser: + def __init__( + self, + api_key: str, + api_base: str | None = None, + model: str | None = None, + **kwargs, + ) -> None: + if not api_key: + raise ValueError("API key is required.") + + self._api_key = api_key + self.api_base = api_base or "https://api.openai.com/v1" + self.model = model or "gpt-3.5-turbo" + self.openai_kwargs = kwargs + + def parse(self, text: str, prompt: str | None = None) -> str: + if not prompt: + prompt = DEFAULT_PROMPT + + async def complete() -> str: + resp = await openai.ChatCompletion.acreate( + api_key=self._api_key, + api_base=self.api_base, + model=self.model, + messages=[ + dict(role="system", content=prompt), + dict(role="user", content=text), + ], + # set temperature to 0 to make results be more stable and reproducible. + temperature=0, + **self.openai_kwargs, + ) + + result = resp["choices"][0]["message"]["content"] + return result + + loop = asyncio.get_event_loop() + result = loop.run_until_complete(complete()) + + logging.debug(f"the parsed result is: {result}") + + return result diff --git a/backend/src/test/test_openai.py b/backend/src/test/test_openai.py new file mode 100644 index 00000000..7320bcb0 --- /dev/null +++ b/backend/src/test/test_openai.py @@ -0,0 +1,18 @@ +import os + +from dotenv import load_dotenv +from module.parser.openai import OpenAIParser + +load_dotenv() + + +class TestOpenAIParser: + @classmethod + def setup_class(cls): + api_key = os.getenv("OPENAI_API_KEY") + cls.parser = OpenAIParser(api_key=api_key) + + def test_parse(self): + text = "[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]" + result = self.parser.parse(text=text) + assert result == "XXX" From 77201d5de2941d1e4a2605323f18b4d76d8a393b Mon Sep 17 00:00:00 2001 From: 100gle Date: Wed, 27 Sep 2023 17:01:14 +0800 Subject: [PATCH 02/24] chore: adjust default parameters and test assertion --- backend/src/module/parser/openai.py | 8 ++++---- backend/src/test/test_openai.py | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/backend/src/module/parser/openai.py b/backend/src/module/parser/openai.py index 1b77ef9a..c172fac0 100644 --- a/backend/src/module/parser/openai.py +++ b/backend/src/module/parser/openai.py @@ -44,16 +44,16 @@ class OpenAIParser: def __init__( self, api_key: str, - api_base: str | None = None, - model: str | None = None, + api_base: str = "https://api.openai.com/v1", + model: str = "gpt-3.5-turbo", **kwargs, ) -> None: if not api_key: raise ValueError("API key is required.") self._api_key = api_key - self.api_base = api_base or "https://api.openai.com/v1" - self.model = model or "gpt-3.5-turbo" + self.api_base = api_base + self.model = model self.openai_kwargs = kwargs def parse(self, text: str, prompt: str | None = None) -> str: diff --git a/backend/src/test/test_openai.py b/backend/src/test/test_openai.py index 7320bcb0..ea40f865 100644 --- a/backend/src/test/test_openai.py +++ b/backend/src/test/test_openai.py @@ -1,3 +1,4 @@ +import json import os from dotenv import load_dotenv @@ -15,4 +16,10 @@ class TestOpenAIParser: def test_parse(self): text = "[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]" result = self.parser.parse(text=text) - assert result == "XXX" + assert json.loads(result) == { + "group": "梦蓝字幕组", + "title_en": "New Doraemon", + "resolution": "1080P", + "episode": 747, + "season": 1, + } From 9f834ecefce48b7db56258571bfb9573a405b301 Mon Sep 17 00:00:00 2001 From: 100gle Date: Wed, 27 Sep 2023 20:37:03 +0800 Subject: [PATCH 03/24] chore: update docs and use logger instance --- backend/src/module/parser/openai.py | 31 ++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/backend/src/module/parser/openai.py b/backend/src/module/parser/openai.py index c172fac0..4f184517 100644 --- a/backend/src/module/parser/openai.py +++ b/backend/src/module/parser/openai.py @@ -3,6 +3,8 @@ import logging import openai +logger = logging.getLogger(__name__) + DEFAULT_PROMPT = """\ You will now play the role of a super assistant. Your task is to extract structured data from unstructured text content and output it in JSON format. @@ -48,6 +50,22 @@ class OpenAIParser: model: str = "gpt-3.5-turbo", **kwargs, ) -> None: + """OpenAIParser is a class to parse text with openai + + Args: + api_key (str): the OpenAI api key + api_base (str): + the OpenAI api base url, you can use custom url here. \ + Defaults to "https://api.openai.com/v1". + model (str): + the ChatGPT model parameter, you can get more details from https://platform.openai.com/docs/api-reference/chat/create. \ + Defaults to "gpt-3.5-turbo". + kwargs (dict): + the OpenAI ChatGPT parameters, you can get more details from https://platform.openai.com/docs/api-reference/chat/create. + + Raises: + ValueError: if api_key is not provided. + """ if not api_key: raise ValueError("API key is required.") @@ -57,6 +75,17 @@ class OpenAIParser: self.openai_kwargs = kwargs def parse(self, text: str, prompt: str | None = None) -> str: + """parse text with openai + + Args: + text (str): the text to be parsed + prompt (str | None, optional): + the custom prompt. Built-in prompt will be used if no prompt is provided. \ + Defaults to None. + + Returns: + str: the parsed text. + """ if not prompt: prompt = DEFAULT_PROMPT @@ -80,6 +109,6 @@ class OpenAIParser: loop = asyncio.get_event_loop() result = loop.run_until_complete(complete()) - logging.debug(f"the parsed result is: {result}") + logger.debug(f"the parsed result is: {result}") return result From bfbf7aed560c46a91a0bc9d68a0fd1b6a821c6a3 Mon Sep 17 00:00:00 2001 From: 100gle Date: Wed, 27 Sep 2023 21:46:38 +0800 Subject: [PATCH 04/24] feat: add experimental openai configuration --- backend/src/module/models/config.py | 8 ++++ .../setting/config-experimental.vue | 45 +++++++++++++++++++ webui/src/i18n/zh-CN.json | 7 +++ webui/src/pages/index/config.vue | 2 + webui/types/config.ts | 13 ++++++ webui/types/dts/components.d.ts | 1 + 6 files changed, 76 insertions(+) create mode 100644 webui/src/components/setting/config-experimental.vue diff --git a/backend/src/module/models/config.py b/backend/src/module/models/config.py index df15546a..3358124b 100644 --- a/backend/src/module/models/config.py +++ b/backend/src/module/models/config.py @@ -81,6 +81,13 @@ class Notification(BaseModel): return expandvars(self.chat_id_) +class Experimental(BaseModel): + openai_enable: bool = Field(False, description="Enable experimental OpenAI") + openai_api_key: str = Field("", description="OpenAI api key") + openai_api_base: str = Field("", description="OpenAI api base url") + openai_model: str = Field("", description="OpenAI model") + + class Config(BaseModel): program: Program = Program() downloader: Downloader = Downloader() @@ -89,6 +96,7 @@ class Config(BaseModel): log: Log = Log() proxy: Proxy = Proxy() notification: Notification = Notification() + Experimental: Experimental = Experimental() def dict(self, *args, by_alias=True, **kwargs): return super().dict(*args, by_alias=by_alias, **kwargs) diff --git a/webui/src/components/setting/config-experimental.vue b/webui/src/components/setting/config-experimental.vue new file mode 100644 index 00000000..dc9dff27 --- /dev/null +++ b/webui/src/components/setting/config-experimental.vue @@ -0,0 +1,45 @@ + + + diff --git a/webui/src/i18n/zh-CN.json b/webui/src/i18n/zh-CN.json index 76f89613..2383cb8b 100644 --- a/webui/src/i18n/zh-CN.json +++ b/webui/src/i18n/zh-CN.json @@ -137,6 +137,13 @@ "username": "用户名", "password": "密码" }, + "experimental_set": { + "title": "实验功能设置", + "openai_enable": "启用 OpenAI", + "openai_api_key": "OpenAI API Key", + "openai_api_base": "OpenAI API Base URL", + "openai_model": "OpenAI 模型" + }, "media_player_set": { "title": "播放器设置", "type": "类型", diff --git a/webui/src/pages/index/config.vue b/webui/src/pages/index/config.vue index 55e6e183..afc5fd14 100644 --- a/webui/src/pages/index/config.vue +++ b/webui/src/pages/index/config.vue @@ -28,6 +28,8 @@ definePage({ + + diff --git a/webui/types/config.ts b/webui/types/config.ts index 12c19937..00e40f1d 100644 --- a/webui/types/config.ts +++ b/webui/types/config.ts @@ -47,6 +47,12 @@ export interface Config { token: string; chat_id: string; }; + experimental: { + openai_enable: boolean; + openai_api_key: string; + openai_api_base: string; + openai_model: string; + }; } export const initConfig: Config = { @@ -96,6 +102,12 @@ export const initConfig: Config = { token: '', chat_id: '', }, + experimental: { + openai_enable: false, + openai_api_key: '', + openai_api_base: '', + openai_model: 'gpt-3.5-turbo', + }, }; type getItem = Pick[T]; @@ -107,6 +119,7 @@ export type BangumiManage = getItem<'bangumi_manage'>; export type Log = getItem<'log'>; export type Proxy = getItem<'proxy'>; export type Notification = getItem<'notification'>; +export type Experimental = getItem<'experimental'>; /** 下载方式 */ export type DownloaderType = UnionToTuple; diff --git a/webui/types/dts/components.d.ts b/webui/types/dts/components.d.ts index cc313003..12def335 100644 --- a/webui/types/dts/components.d.ts +++ b/webui/types/dts/components.d.ts @@ -34,6 +34,7 @@ declare module '@vue/runtime-core' { AbTag: typeof import('./../../src/components/basic/ab-tag.vue')['default'] AbTopbar: typeof import('./../../src/components/layout/ab-topbar.vue')['default'] ConfigDownload: typeof import('./../../src/components/setting/config-download.vue')['default'] + ConfigExperimental: typeof import('./../../src/components/setting/config-experimental.vue')['default'] ConfigManage: typeof import('./../../src/components/setting/config-manage.vue')['default'] ConfigNormal: typeof import('./../../src/components/setting/config-normal.vue')['default'] ConfigNotification: typeof import('./../../src/components/setting/config-notification.vue')['default'] From 03304baea6f5e1c23ba5a5805ac5876ae7b85f17 Mon Sep 17 00:00:00 2001 From: 100gle Date: Thu, 28 Sep 2023 09:08:17 +0800 Subject: [PATCH 05/24] feat: add openai configuration to web ui --- backend/src/module/models/config.py | 6 +++--- backend/src/module/parser/openai.py | 6 ++++-- webui/types/config.ts | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/backend/src/module/models/config.py b/backend/src/module/models/config.py index 3358124b..51de420a 100644 --- a/backend/src/module/models/config.py +++ b/backend/src/module/models/config.py @@ -84,8 +84,8 @@ class Notification(BaseModel): class Experimental(BaseModel): openai_enable: bool = Field(False, description="Enable experimental OpenAI") openai_api_key: str = Field("", description="OpenAI api key") - openai_api_base: str = Field("", description="OpenAI api base url") - openai_model: str = Field("", description="OpenAI model") + openai_api_base: str = Field("https://api.openai.com/v1", description="OpenAI api base url") + openai_model: str = Field("gpt-3.5-turbo", description="OpenAI model") class Config(BaseModel): @@ -96,7 +96,7 @@ class Config(BaseModel): log: Log = Log() proxy: Proxy = Proxy() notification: Notification = Notification() - Experimental: Experimental = Experimental() + experimental: Experimental = Experimental() def dict(self, *args, by_alias=True, **kwargs): return super().dict(*args, by_alias=by_alias, **kwargs) diff --git a/backend/src/module/parser/openai.py b/backend/src/module/parser/openai.py index 4f184517..2ac051ca 100644 --- a/backend/src/module/parser/openai.py +++ b/backend/src/module/parser/openai.py @@ -58,10 +58,12 @@ class OpenAIParser: the OpenAI api base url, you can use custom url here. \ Defaults to "https://api.openai.com/v1". model (str): - the ChatGPT model parameter, you can get more details from https://platform.openai.com/docs/api-reference/chat/create. \ + the ChatGPT model parameter, you can get more details from \ + https://platform.openai.com/docs/api-reference/chat/create. \ Defaults to "gpt-3.5-turbo". kwargs (dict): - the OpenAI ChatGPT parameters, you can get more details from https://platform.openai.com/docs/api-reference/chat/create. + the OpenAI ChatGPT parameters, you can get more details from \ + https://platform.openai.com/docs/api-reference/chat/create. Raises: ValueError: if api_key is not provided. diff --git a/webui/types/config.ts b/webui/types/config.ts index 00e40f1d..180fff0b 100644 --- a/webui/types/config.ts +++ b/webui/types/config.ts @@ -105,7 +105,7 @@ export const initConfig: Config = { experimental: { openai_enable: false, openai_api_key: '', - openai_api_base: '', + openai_api_base: 'https://api.openai.com/v1/', openai_model: 'gpt-3.5-turbo', }, }; From 4d15c20ab5a9de23a4e1dbd8472a0c371b592071 Mon Sep 17 00:00:00 2001 From: 100gle Date: Thu, 28 Sep 2023 09:38:00 +0800 Subject: [PATCH 06/24] feat: add warning information for experimental settings --- webui/src/components/setting/config-experimental.vue | 5 +++++ webui/src/i18n/en.json | 10 +++++++++- webui/src/i18n/zh-CN.json | 5 +++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/webui/src/components/setting/config-experimental.vue b/webui/src/components/setting/config-experimental.vue index dc9dff27..3b0e9e54 100644 --- a/webui/src/components/setting/config-experimental.vue +++ b/webui/src/components/setting/config-experimental.vue @@ -1,4 +1,5 @@