{"domain":"asiai.dev","count":1,"changes":[{"captured_at":"2026-05-23T01:44:52","card_hash":"5c4137ec905ff12ea946bdd5a74e50612027b3333d2e4ee924e1f35c3c766415","previous_card_hash":null,"diff":{"skills_added":[{"id":"check-inference-health","name":"Check Inference Health","description":"Quick health check of all local LLM inference engines. Returns ok/degraded/error, memory pressure, thermal state, GPU. Responds in <500ms.","tags":["health","monitoring","apple-silicon"],"inputModes":null,"outputModes":null},{"id":"compare-engines","name":"Compare Engines","description":"Side-by-side comparison of inference engines or models from benchmark history.","tags":["comparison","benchmark","analysis"],"inputModes":null,"outputModes":null},{"id":"detect-engines","name":"Detect Inference Engines","description":"Auto-detect running LLM inference engines (Ollama, LM Studio, mlx-lm, llama.cpp, vLLM-MLX, Exo, TurboQuant).","tags":["discovery","engines","apple-silicon"],"inputModes":null,"outputModes":null},{"id":"diagnose","name":"Run Diagnostics","description":"Comprehensive diagnostic checks: Apple Silicon compat, engines health, DB integrity, daemon status, alerting config.","tags":["diagnostics","troubleshooting"],"inputModes":null,"outputModes":null},{"id":"get-inference-snapshot","name":"Full Inference Snapshot","description":"Complete system + inference state: CPU load, memory, thermal, GPU, engines status, loaded models, recent activity.","tags":["snapshot","monitoring","system"],"inputModes":null,"outputModes":null},{"id":"list-models","name":"List Loaded Models","description":"List all models currently loaded across inference engines (VRAM, quantization, context length).","tags":["models","inventory","inference"],"inputModes":null,"outputModes":null},{"id":"recommend-engine","name":"Recommend Engine and Model","description":"Hardware-aware engine+model recommendations optimized for throughput, latency, or power efficiency.","tags":["recommendation","hardware","inference"],"inputModes":null,"outputModes":null},{"id":"run-benchmark","name":"Run Inference Benchmark","description":"Benchmark a local model's performance (tok/s, TTFT, VRAM, power) with statistical rigour (CI 95%, P50/P90/P99). Supports multi-engine and cross-model comparison.","tags":["benchmark","performance","inference"],"inputModes":null,"outputModes":null}],"skills_removed":[],"skills_changed":[],"fields_changed":[{"field":"name","before":null,"after":"asiai"},{"field":"description","before":null,"after":"Apple Silicon LLM inference benchmark and monitoring agent. Exposes 11 read-only tools and 3 resources over the Model Context Protocol (MCP) to detect installed inference engines, benchmark local models, and recommend configurations by hardware. Runs locally (stdio) or over SSE/streamable-HTTP."},{"field":"version","before":null,"after":"1.6.0"},{"field":"url","before":null,"after":"https://asiai.dev"},{"field":"documentationUrl","before":null,"after":"https://asiai.dev/commands/mcp/"},{"field":"preferredTransport","before":null,"after":"stdio"}],"other_changed":true,"is_empty":false,"human_summary":"added 8 skills · name ∅ → asiai · description ∅ → Apple Silicon LLM inference benchmark an · version ∅ → 1.6.0 · url ∅ → https://asiai.dev · documentationUrl ∅ → https://asiai.dev/commands/mcp/ · preferredTransport ∅ → stdio"}}]}