Compare commits

...

4 Commits

Author SHA1 Message Date
MrRaph_
a25e166c14 . 2025-03-27 08:57:24 +01:00
MrRaph_
db470a5d28 . 2025-03-27 08:57:04 +01:00
MrRaph_
5872e0b288 fix: if no agenda/reminder should send original transcript 2025-03-26 08:03:30 +01:00
MrRaph_
698ebf3bef fixed mime types 2025-03-26 08:01:57 +01:00
2 changed files with 34 additions and 15 deletions

View File

@@ -1,6 +1,6 @@
maubot: 0.1.0 maubot: 0.1.0
id: nigzu.com.maubot-stt id: nigzu.com.maubot-stt
version: 0.3.1 version: 0.3.4
license: MIT license: MIT
modules: modules:
- openai-whisper - openai-whisper

View File

@@ -9,11 +9,14 @@ from mautrix.client import Client
from maubot.handlers import event from maubot.handlers import event
from maubot import Plugin, MessageEvent from maubot import Plugin, MessageEvent
from mautrix.errors import MatrixRequestError from mautrix.errors import MatrixRequestError
from mautrix.types import EventType, MessageType, RelationType, TextMessageEventContent, Format,RelatesTo,InReplyTo from mautrix.types import (
EventType, MessageType, RelationType,
TextMessageEventContent, Format, RelatesTo, InReplyTo
)
from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper
ALLOWED_EXTENSIONS = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm'] ALLOWED_EXTENSIONS = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
ALLOWED_MIME_TYPES = ['audio/flac','audio/mp4','video/mpeg','audio/ogg','audio/wav','video/webm'] ALLOWED_MIME_TYPES = ['audio/flac', 'audio/mp4', 'video/mpeg', 'audio/ogg', 'audio/wav', 'video/webm']
class Config(BaseProxyConfig): class Config(BaseProxyConfig):
def do_update(self, helper: ConfigUpdateHelper) -> None: def do_update(self, helper: ConfigUpdateHelper) -> None:
@@ -48,11 +51,15 @@ class WhisperPlugin(Plugin):
if self.allowed_rooms and event.room_id not in self.allowed_rooms: if self.allowed_rooms and event.room_id not in self.allowed_rooms:
return False return False
if event.content.info.mimetype not in ALLOWED_MIME_TYPES: # Extraction de la partie principale du MIME type (avant les éventuels paramètres)
mime_type = ""
if event.content.info and event.content.info.mimetype:
mime_type = event.content.info.mimetype.split(";")[0]
if mime_type not in ALLOWED_MIME_TYPES:
return False return False
return event.content.msgtype == MessageType.AUDIO or event.content.msgtype == MessageType.FILE return event.content.msgtype in (MessageType.AUDIO, MessageType.FILE)
@event.on(EventType.ROOM_MESSAGE) @event.on(EventType.ROOM_MESSAGE)
async def on_message(self, event: MessageEvent) -> None: async def on_message(self, event: MessageEvent) -> None:
@@ -64,10 +71,22 @@ class WhisperPlugin(Plugin):
await self.client.set_typing(event.room_id, timeout=99999) await self.client.set_typing(event.room_id, timeout=99999)
audio_bytes = await self.client.download_media(url=event.content.url) audio_bytes = await self.client.download_media(url=event.content.url)
transcription = await self.transcribe_audio(audio_bytes) if not audio_bytes:
await event.respond("Erreur lors du téléchargement du fichier audio.")
return
# Récupère le nom de fichier s'il est défini, sinon utilise une valeur par défaut
filename = getattr(event.content, "filename", "audio.mp3")
# Utilise le MIME type tel quel, ou une valeur par défaut
mime_type = event.content.info.mimetype if event.content.info and event.content.info.mimetype else "audio/mpeg"
transcription = await self.transcribe_audio(audio_bytes, filename, mime_type)
# Si l'étude est activée, on utilise son résultat uniquement si celui-ci renvoie une commande
if self.search_reminders_and_events: if self.search_reminders_and_events:
transcription = await self.study_transcribe(transcription) studied = await self.study_transcribe(transcription)
if studied.startswith("!rappel") or studied.startswith("!agenda"):
transcription = studied
await self.client.set_typing(event.room_id, timeout=0) await self.client.set_typing(event.room_id, timeout=0)
content = TextMessageEventContent( content = TextMessageEventContent(
@@ -89,25 +108,25 @@ class WhisperPlugin(Plugin):
self.log.exception(f"Something went wrong: {e}") self.log.exception(f"Something went wrong: {e}")
await event.respond(f"Something went wrong: {e}") await event.respond(f"Something went wrong: {e}")
async def transcribe_audio(self, audio_bytes: bytes, filename: str = "audio.mp3", mime_type: str = "audio/mpeg") -> str:
async def transcribe_audio(self, audio_bytes: bytes) -> str:
headers = { headers = {
"Authorization": f"Bearer {self.api_key}" "Authorization": f"Bearer {self.api_key}"
} }
data = aiohttp.FormData() data = aiohttp.FormData()
data.add_field('file', audio_bytes, filename='audio.mp3', content_type='audio/mpeg') data.add_field('file', audio_bytes, filename=filename, content_type=mime_type)
data.add_field('model', 'whisper-1') data.add_field('model', 'whisper-1')
if self.prompt: if self.prompt:
data.add_field('prompt', f"{self.prompt}") data.add_field('prompt', self.prompt)
if self.language: if self.language:
data.add_field('language', f"{self.language}") data.add_field('language', self.language)
try: try:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.post(self.whisper_endpoint, headers=headers, data=data) as response: async with session.post(self.whisper_endpoint, headers=headers, data=data) as response:
if response.status != 200: if response.status != 200:
self.log.error(f"Error response from API: {await response.text()}") error_text = await response.text()
return f"Error: {await response.text()}" self.log.error(f"Error response from API: {error_text}")
return f"Error: {error_text}"
response_json = await response.json() response_json = await response.json()
return response_json.get("text", "Sorry, I can't transcribe the audio.") return response_json.get("text", "Sorry, I can't transcribe the audio.")
except Exception as e: except Exception as e: