Source code for fenn.agents.llm

import os
import time

PROVIDERS = {
    "openrouter": "https://openrouter.ai/api/v1",
    "together": "https://api.together.xyz/v1",
    "groq": "https://api.groq.com/openai/v1",
    "fireworks": "https://api.fireworks.ai/inference/v1",
    "deepinfra": "https://api.deepinfra.com/v1/openai",
    "anyscale": "https://api.endpoints.anyscale.com/v1",
    "perplexity": "https://api.perplexity.ai",
    "openai": "https://api.openai.com/v1",
    "anthropic": "https://api.anthropic.com/v1",
    "gemini": "https://generativelanguage.googleapis.com/v1beta/openai",
    "mistral": "https://api.mistral.ai/v1",
    "cohere": "https://api.cohere.ai/compatibility/v1",
    "xai": "https://api.x.ai/v1",
    "deepseek": "https://api.deepseek.com/v1",
    "cerebras": "https://api.cerebras.ai/v1",
    "nvidia": "https://integrate.api.nvidia.com/v1",
    "ollama": "http://localhost:11434/v1",
    "lmstudio": "http://localhost:1234/v1",
    "llamacpp": "http://localhost:8080/v1",
}

DEFAULT_MODELS = {
    "openrouter": "arcee-ai/trinity-large-preview:free",
    "together": "meta-llama/Llama-3-8b-chat-hf",
    "groq": "llama-3.1-8b-instant",
    "fireworks": "accounts/fireworks/models/llama-v3p1-8b-instruct",
    "deepinfra": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "perplexity": "llama-3.1-sonar-small-128k-online",
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash",
    "mistral": "mistral-small-latest",
    "cohere": "command-r-plus",
    "xai": "grok-beta",
    "deepseek": "deepseek-chat",
    "cerebras": "llama3.1-8b",
    "nvidia": "meta/llama-3.1-8b-instruct",
    "ollama": "llama3",
    "lmstudio": "local-model",
    "llamacpp": "local-model",
}

ENV_KEYS = {
    "openrouter": "OPENROUTER_API_KEY",
    "together": "TOGETHER_API_KEY",
    "groq": "GROQ_API_KEY",
    "fireworks": "FIREWORKS_API_KEY",
    "deepinfra": "DEEPINFRA_API_KEY",
    "anyscale": "ANYSCALE_API_KEY",
    "perplexity": "PERPLEXITY_API_KEY",
    "openai": "OPENAI_API_KEY",
    "anthropic": "ANTHROPIC_API_KEY",
    "gemini": "GEMINI_API_KEY",
    "mistral": "MISTRAL_API_KEY",
    "cohere": "COHERE_API_KEY",
    "xai": "XAI_API_KEY",
    "deepseek": "DEEPSEEK_API_KEY",
    "cerebras": "CEREBRAS_API_KEY",
    "nvidia": "NVIDIA_API_KEY",
    "ollama": None,
    "lmstudio": None,
    "llamacpp": None,
}

LOCAL_PROVIDERS = {"ollama", "lmstudio", "llamacpp"}


def _detect_provider(provider, model, base_url):
    if provider:
        return provider
    if base_url:
        for name, url in PROVIDERS.items():
            if url in base_url:
                return name
        return "openrouter"
    if model:
        if model.startswith(("gpt-", "o1-", "o3-", "o4-")):
            return "openai"
        if model.startswith("gemini"):
            return "gemini"
        if model.startswith("claude"):
            return "anthropic"
        if model.startswith(("mistral", "codestral")):
            return "mistral"
        if model.startswith("command"):
            return "cohere"
        if model.startswith("grok"):
            return "xai"
        if model.startswith("deepseek"):
            return "deepseek"
        if model.startswith(("llama", "mixtral")):
            return "groq"
        if "/" in model:
            return "openrouter"
    return "openrouter"


[docs] class LLMClient: """ Unified LLM client supporting all major providers via an OpenAI-compatible API. Parameters ---------- provider : str, optional Provider name (e.g. "openai", "anthropic", "openrouter", "ollama"). Auto-detected from model name or base_url when omitted. model : str, optional Model identifier. Defaults to the provider's recommended default. api_key : str, optional API key. Takes priority over api_key_env and environment lookup. api_key_env : str, optional Environment variable name to read the API key from. Overrides the provider's default env var (e.g. OPENROUTER_API_KEY). base_url : str, optional Custom API base URL. Overrides the provider's default endpoint. """
[docs] def __init__( self, provider=None, model=None, api_key=None, api_key_env=None, base_url=None ): self.provider = _detect_provider(provider, model, base_url) self.model = model or DEFAULT_MODELS.get(self.provider, "gpt-4o-mini") self.base_url = base_url or PROVIDERS.get( self.provider, PROVIDERS["openrouter"] ) self.api_key = self._resolve_key(api_key, api_key_env)
def _resolve_key(self, api_key, api_key_env): if api_key: return api_key if self.provider in LOCAL_PROVIDERS: return "local" env_var = api_key_env or ENV_KEYS.get(self.provider) if env_var: key = os.environ.get(env_var) if not key: raise ValueError( f"[fenn] API key not found for provider '{self.provider}'.\n" f"Set {env_var} in your .env file or pass api_key='...' to LLMClient()." ) return key return "local" def _openai_client(self): try: from openai import OpenAI return OpenAI(api_key=self.api_key or "local", base_url=self.base_url) except ImportError: raise ImportError( "[fenn] 'openai' package not found. Run: pip install openai" )
[docs] def chat_complete(self, messages, schema=None, retries=3): """ Call the chat completions API with a list of message dicts. Parameters ---------- messages : list of dict Messages in OpenAI format: [{"role": "user", "content": "..."}]. schema : pydantic.BaseModel, optional If provided, instructs the model to return JSON matching this schema. retries : int Number of retry attempts on rate limit errors. Returns ------- str or pydantic.BaseModel """ try: from openai import RateLimitError except ImportError: raise ImportError( "[fenn] 'openai' package not found. Run: pip install openai" ) client = self._openai_client() msgs = [ dict(m) for m in messages ] # shallow-copy to avoid mutating caller's list kwargs = dict(model=self.model, messages=msgs) if schema: msgs[-1]["content"] += ( f"\n\nRespond ONLY with a valid JSON object matching this schema:\n" f"{schema.model_json_schema()}\n" f"Do not include any text outside the JSON object." ) kwargs["response_format"] = {"type": "json_object"} for attempt in range(retries): try: response = client.chat.completions.create(**kwargs) text = response.choices[0].message.content if schema: import json return schema.model_validate(json.loads(text)) return text except RateLimitError: if attempt < retries - 1: wait = 5 * (attempt + 1) print( f"[fenn] rate limit hit, retrying in {wait}s... ({attempt + 1}/{retries})" ) time.sleep(wait) else: raise
[docs] def ask(self, prompt, schema=None, retries=3): """ Send a single prompt and return the response. Parameters ---------- prompt : str The user message to send. schema : pydantic.BaseModel, optional If provided, validates the response against this schema. retries : int Retry attempts on rate limit errors. Returns ------- str or pydantic.BaseModel """ return self.chat_complete( [{"role": "user", "content": prompt}], schema=schema, retries=retries )
[docs] def stream(self, prompt): """ Send a prompt and yield response tokens one by one. Parameters ---------- prompt : str The user message to send. Yields ------ str Individual tokens from the LLM response. """ client = self._openai_client() response = client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], stream=True, ) for chunk in response: if not hasattr(chunk, "choices") or not chunk.choices: continue delta = getattr(chunk.choices[0].delta, "content", "") or "" if delta: yield delta