feat: added transcription analysis to create reminders/agendas
This commit is contained in:
@@ -14,5 +14,8 @@ allowed_rooms: []
|
||||
# Optional text to guide the model's style or to continue from a previous audio segment. The prompt should match the language of the audio.
|
||||
prompt: "以下是普通话录制的会议记录:"
|
||||
|
||||
#Optional should OpenAI re-read the transcript to detect reminders and events ?
|
||||
search_reminders_and_events: false
|
||||
|
||||
# The language of the input audio. Providing the input language in ISO-639-1 format (Chinese: "zh") will improve accuracy and latency, leave empty by default
|
||||
language:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
maubot: 0.1.0
|
||||
id: nigzu.com.maubot-stt
|
||||
version: 0.2.6
|
||||
version: 0.3.0
|
||||
license: MIT
|
||||
modules:
|
||||
- openai-whisper
|
||||
|
||||
@@ -19,6 +19,7 @@ class Config(BaseProxyConfig):
|
||||
helper.copy("allowed_users")
|
||||
helper.copy("allowed_rooms")
|
||||
helper.copy("prompt")
|
||||
helper.copy("search_reminders_and_events")
|
||||
helper.copy("language")
|
||||
|
||||
class WhisperPlugin(Plugin):
|
||||
@@ -32,6 +33,7 @@ class WhisperPlugin(Plugin):
|
||||
self.language = self.config['language']
|
||||
self.allowed_users = self.config['allowed_users']
|
||||
self.allowed_rooms = self.config['allowed_rooms']
|
||||
self.search_reminders_and_events = self.config['search_reminders_and_events']
|
||||
self.log.debug("Whisper plugin started")
|
||||
|
||||
async def should_respond(self, event: MessageEvent) -> bool:
|
||||
@@ -55,11 +57,15 @@ class WhisperPlugin(Plugin):
|
||||
await event.mark_read()
|
||||
await self.client.set_typing(event.room_id, timeout=99999)
|
||||
|
||||
self.log.error(event)
|
||||
|
||||
audio_bytes = await self.client.download_media(url=event.content.url)
|
||||
transcription = await self.transcribe_audio(audio_bytes)
|
||||
|
||||
await self.client.set_typing(event.room_id, timeout=0)
|
||||
if self.search_reminders_and_events:
|
||||
transcription = await self.study_transcribe(transcription)
|
||||
|
||||
await self.client.set_typing(event.room_id, timeout=0)
|
||||
content = TextMessageEventContent(
|
||||
msgtype=MessageType.TEXT,
|
||||
body=transcription,
|
||||
@@ -104,6 +110,40 @@ class WhisperPlugin(Plugin):
|
||||
self.log.exception(f"Failed to transcribe audio, msg: {e}")
|
||||
return "Sorry, an error occurred while transcribing the audio."
|
||||
|
||||
async def study_transcribe(self, transcription: str) -> str:
|
||||
prompt = f"""
|
||||
Voici la transcription du message vocal :
|
||||
|
||||
{transcription}
|
||||
Ton objectif est d'analyser cette transcription afin de déterminer si l'utilisateur tente de créer un rappel ou un évènement.
|
||||
|
||||
- Si l'utilisateur essaie de créer un rappel, la sortie doit prendre la forme :
|
||||
!rappel <date> <message>
|
||||
- Si l'utilisateur essaie de créer un évènement, la sortie doit prendre la forme :
|
||||
!agenda ##ROOM## <message>
|
||||
- Si l'utilisateur ne cherche ni à créer un rappel ni un évènement, renvoie seulement la transcription telle quelle, sans ajout d'explication, de texte supplémentaire ou de ponctuation superflue.
|
||||
Ne fournis aucun autre texte ni explication dans ta réponse, uniquement la sortie demandée.
|
||||
"""
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.7
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
response_json = await response.json()
|
||||
return response_json.get('choices', [])[0].get('message', {}).get('content', transcription)
|
||||
|
||||
@classmethod
|
||||
def get_config_class(cls) -> Type[BaseProxyConfig]:
|
||||
return Config
|
||||
|
||||
Reference in New Issue
Block a user