from commons.models.internal.base import BaseGenerativeModel from commons.models.response import Response class MistralAPIInternalLLM(BaseGenerativeModel): """ Implémentation Mistral API pour v200. Cette classe contient : - la configuration backend (client, modèle) - generate() : appel synchrone - stream_generate() : appel SSE - normalisation des réponses """ def __init__(self, client, model_name: str): self.client = client self.model_name = model_name def generate(self, prompt: str, **kwargs) -> Response: """ Appel synchrone : requête HTTP complète, pas de streaming. Le format exact de la réponse dépend du backend. """ raw = self.client.chat.completions.create( model=self.model_name, messages=[{"role": "user", "content": prompt}], stream=False, **kwargs, ) try: text = raw["choices"][0]["message"]["content"] except Exception: text = str(raw) return Response( response=text, raw_response=raw, source_nodes=None, metadata={}, ) def stream_generate(self, prompt: str, **kwargs): """ Retourne un flux de chunks. Le format exact dépend de l'implémentation. Convention interne v200 : - chunks intermédiaires : {"delta": "..."} - chunk final : {"delta": "", "meta": {...}} """ stream = self.client.chat.completions.create( model=self.model_name, messages=[{"role": "user", "content": prompt}], stream=True, **kwargs, ) final_message = None for chunk in stream: delta = "" if chunk.choices: c = chunk.choices[0] if getattr(c, "delta", None) and getattr(c.delta, "content", None): delta = c.delta.content or "" if getattr(c, "message", None): final_message = c.message yield {"delta": delta} yield { "delta": "", "meta": { "raw_response": final_message, "nodes": None, "metadata": {}, }, }