Card snapshot

asiai.dev · 2026-05-23 01:44:52 UTC · 5c4137ec905ff12ea946bdd5a74e50612027b3333d2e4ee924e1f35c3c766415
This is a frozen copy of the agent's agent-card.json as we observed it at the timestamp above. We capture a new snapshot every time the card's content hash changes. Useful for: forensic drift analysis, verifying downstream callers see the right version, reproducing routing decisions made historically.
{
  "name": "asiai",
  "description": "Apple Silicon LLM inference benchmark and monitoring agent. Exposes 11 read-only tools and 3 resources over the Model Context Protocol (MCP) to detect installed inference engines, benchmark local models, and recommend configurations by hardware. Runs locally (stdio) or over SSE/streamable-HTTP.",
  "url": "https://asiai.dev",
  "version": "1.6.0",
  "documentationUrl": "https://asiai.dev/commands/mcp/",
  "provider": {
    "organization": "asiai (Jean-Marc Nahlovsky / druide67)",
    "url": "https://asiai.dev"
  },
  "protocols": [
    "mcp"
  ],
  "mcpServerCard": "https://asiai.dev/mcp-server.json",
  "preferredTransport": "stdio",
  "supportedInterfaces": [
    {
      "url": "local://asiai-mcp",
      "transport": "stdio",
      "description": "MCP server over stdio \u2014 invoke via `asiai mcp`"
    },
    {
      "url": "http://127.0.0.1:8765/sse",
      "transport": "sse",
      "description": "MCP server over Server-Sent Events \u2014 invoke via `asiai mcp --transport sse`"
    },
    {
      "url": "http://127.0.0.1:8765/mcp",
      "transport": "streamable-http",
      "description": "MCP server over streamable HTTP \u2014 invoke via `asiai mcp --transport streamable-http`"
    }
  ],
  "additionalInterfaces": [
    {
      "url": "https://asiai.dev/mcp-server.json",
      "transport": "http+mcp-card",
      "description": "MCP Server Card (static discovery manifest)"
    }
  ],
  "capabilities": {
    "streaming": true,
    "pushNotifications": false,
    "stateTransitionHistory": false
  },
  "defaultInputModes": [
    "text"
  ],
  "defaultOutputModes": [
    "text",
    "application/json"
  ],
  "skills": [
    {
      "id": "check-inference-health",
      "name": "Check Inference Health",
      "description": "Quick health check of all local LLM inference engines. Returns ok/degraded/error, memory pressure, thermal state, GPU. Responds in <500ms.",
      "tags": [
        "health",
        "monitoring",
        "apple-silicon"
      ],
      "examples": [
        "Is local LLM inference available right now?"
      ]
    },
    {
      "id": "list-models",
      "name": "List Loaded Models",
      "description": "List all models currently loaded across inference engines (VRAM, quantization, context length).",
      "tags": [
        "models",
        "inventory",
        "inference"
      ],
      "examples": [
        "What models are loaded right now?"
      ]
    },
    {
      "id": "detect-engines",
      "name": "Detect Inference Engines",
      "description": "Auto-detect running LLM inference engines (Ollama, LM Studio, mlx-lm, llama.cpp, vLLM-MLX, Exo, TurboQuant).",
      "tags": [
        "discovery",
        "engines",
        "apple-silicon"
      ],
      "examples": [
        "Which inference engines are installed on this Mac?"
      ]
    },
    {
      "id": "run-benchmark",
      "name": "Run Inference Benchmark",
      "description": "Benchmark a local model's performance (tok/s, TTFT, VRAM, power) with statistical rigour (CI 95%, P50/P90/P99). Supports multi-engine and cross-model comparison.",
      "tags": [
        "benchmark",
        "performance",
        "inference"
      ],
      "examples": [
        "Benchmark Qwen 3.6 on Ollama NVFP4",
        "Compare Qwen 3.5 vs 3.6 on this Mac"
      ]
    },
    {
      "id": "recommend-engine",
      "name": "Recommend Engine and Model",
      "description": "Hardware-aware engine+model recommendations optimized for throughput, latency, or power efficiency.",
      "tags": [
        "recommendation",
        "hardware",
        "inference"
      ],
      "examples": [
        "What's the fastest engine for my Mac?",
        "Which model fits my RAM?"
      ]
    },
    {
      "id": "compare-engines",
      "name": "Compare Engines",
      "description": "Side-by-side comparison of inference engines or models from benchmark history.",
      "tags": [
        "comparison",
        "benchmark",
        "analysis"
      ],
      "examples": [
        "Compare Ollama MLX vs LM Studio for Qwen 3.6"
      ]
    },
    {
      "id": "get-inference-snapshot",
      "name": "Full Inference Snapshot",
      "description": "Complete system + inference state: CPU load, memory, thermal, GPU, engines status, loaded models, recent activity.",
      "tags": [
        "snapshot",
        "monitoring",
        "system"
      ],
      "examples": [
        "Give me a full status report of local inference"
      ]
    },
    {
      "id": "diagnose",
      "name": "Run Diagnostics",
      "description": "Comprehensive diagnostic checks: Apple Silicon compat, engines health, DB integrity, daemon status, alerting config.",
      "tags": [
        "diagnostics",
        "troubleshooting"
      ],
      "examples": [
        "Diagnose why inference is failing"
      ]
    }
  ],
  "related": {
    "mcpServerCard": "https://asiai.dev/mcp-server.json",
    "agentSkills": "https://asiai.dev/.well-known/agent-skills.json",
    "apiCatalog": "https://asiai.dev/.well-known/api-catalog",
    "openapi": "https://asiai.dev/openapi.json",
    "llmsTxt": "https://asiai.dev/llms.txt"
  }
}
Cookies on Agenstry