Compare commits

...

6 Commits

Author SHA1 Message Date
MrRaph_
a25e166c14 . 2025-03-27 08:57:24 +01:00
MrRaph_
db470a5d28 . 2025-03-27 08:57:04 +01:00
MrRaph_
5872e0b288 fix: if no agenda/reminder should send original transcript 2025-03-26 08:03:30 +01:00
MrRaph_
698ebf3bef fixed mime types 2025-03-26 08:01:57 +01:00
MrRaph_
63b9d0c3db feat: excluding .mbp files 2024-12-13 15:30:40 +01:00
MrRaph_
5ba28d859c feat: checking file mime type before sending it to Whisper 2024-12-13 15:29:42 +01:00
3 changed files with 38 additions and 14 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*.mbp

View File

@@ -1,6 +1,6 @@
maubot: 0.1.0
id: nigzu.com.maubot-stt
version: 0.3.0
version: 0.3.4
license: MIT
modules:
- openai-whisper

View File

@@ -9,9 +9,15 @@ from mautrix.client import Client
from maubot.handlers import event
from maubot import Plugin, MessageEvent
from mautrix.errors import MatrixRequestError
from mautrix.types import EventType, MessageType, RelationType, TextMessageEventContent, Format,RelatesTo,InReplyTo
from mautrix.types import (
EventType, MessageType, RelationType,
TextMessageEventContent, Format, RelatesTo, InReplyTo
)
from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper
ALLOWED_EXTENSIONS = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
ALLOWED_MIME_TYPES = ['audio/flac', 'audio/mp4', 'video/mpeg', 'audio/ogg', 'audio/wav', 'video/webm']
class Config(BaseProxyConfig):
def do_update(self, helper: ConfigUpdateHelper) -> None:
helper.copy("whisper_endpoint")
@@ -46,7 +52,14 @@ class WhisperPlugin(Plugin):
if self.allowed_rooms and event.room_id not in self.allowed_rooms:
return False
return event.content.msgtype == MessageType.AUDIO or event.content.msgtype == MessageType.FILE
# Extraction de la partie principale du MIME type (avant les éventuels paramètres)
mime_type = ""
if event.content.info and event.content.info.mimetype:
mime_type = event.content.info.mimetype.split(";")[0]
if mime_type not in ALLOWED_MIME_TYPES:
return False
return event.content.msgtype in (MessageType.AUDIO, MessageType.FILE)
@event.on(EventType.ROOM_MESSAGE)
async def on_message(self, event: MessageEvent) -> None:
@@ -57,13 +70,23 @@ class WhisperPlugin(Plugin):
await event.mark_read()
await self.client.set_typing(event.room_id, timeout=99999)
self.log.error(event)
audio_bytes = await self.client.download_media(url=event.content.url)
transcription = await self.transcribe_audio(audio_bytes)
if not audio_bytes:
await event.respond("Erreur lors du téléchargement du fichier audio.")
return
# Récupère le nom de fichier s'il est défini, sinon utilise une valeur par défaut
filename = getattr(event.content, "filename", "audio.mp3")
# Utilise le MIME type tel quel, ou une valeur par défaut
mime_type = event.content.info.mimetype if event.content.info and event.content.info.mimetype else "audio/mpeg"
transcription = await self.transcribe_audio(audio_bytes, filename, mime_type)
# Si l'étude est activée, on utilise son résultat uniquement si celui-ci renvoie une commande
if self.search_reminders_and_events:
transcription = await self.study_transcribe(transcription)
studied = await self.study_transcribe(transcription)
if studied.startswith("!rappel") or studied.startswith("!agenda"):
transcription = studied
await self.client.set_typing(event.room_id, timeout=0)
content = TextMessageEventContent(
@@ -85,25 +108,25 @@ class WhisperPlugin(Plugin):
self.log.exception(f"Something went wrong: {e}")
await event.respond(f"Something went wrong: {e}")
async def transcribe_audio(self, audio_bytes: bytes) -> str:
async def transcribe_audio(self, audio_bytes: bytes, filename: str = "audio.mp3", mime_type: str = "audio/mpeg") -> str:
headers = {
"Authorization": f"Bearer {self.api_key}"
}
data = aiohttp.FormData()
data.add_field('file', audio_bytes, filename='audio.mp3', content_type='audio/mpeg')
data.add_field('file', audio_bytes, filename=filename, content_type=mime_type)
data.add_field('model', 'whisper-1')
if self.prompt:
data.add_field('prompt', f"{self.prompt}")
data.add_field('prompt', self.prompt)
if self.language:
data.add_field('language', f"{self.language}")
data.add_field('language', self.language)
try:
async with aiohttp.ClientSession() as session:
async with session.post(self.whisper_endpoint, headers=headers, data=data) as response:
if response.status != 200:
self.log.error(f"Error response from API: {await response.text()}")
return f"Error: {await response.text()}"
error_text = await response.text()
self.log.error(f"Error response from API: {error_text}")
return f"Error: {error_text}"
response_json = await response.json()
return response_json.get("text", "Sorry, I can't transcribe the audio.")
except Exception as e: