Model selector — list, choose, and validate AI models against NPU capabilities.
Supports Ollama and OpenAI-compatible backends. For each model it evaluates
NPU compatibility and emits a human-readable warning when the model is unlikely
to run efficiently on the AMD Ryzen AI NPU.
All network calls respect the application's network.allow_external guard —
model listings are fetched only from the locally configured backend URL.
Example
selector = ModelSelector(config)
models = selector.list_models()
for m in models:
... warn = selector.npu_warning(m)
... print(m.name, "— WARNING:", warn if warn else "OK")
selector.set_model("llama3.2:3b-instruct-q4_K_M")
ModelInfo
dataclass
ModelInfo(name, size_bytes=0, family='', quantization='', is_vision=False, raw=dict())
Metadata about a single model available from the backend.
Attributes
name:
Model identifier as returned by the backend (e.g. "llama3:8b-q4_K_M").
size_bytes:
Raw model size in bytes as reported by the backend (0 if unknown).
family:
Model family string (e.g. "llama"), lower-cased.
quantization:
Quantization level string if detectable from the name (e.g. "q4_k_m").
is_vision:
True when the model is known to accept image inputs.
raw:
Full raw dict from the backend API for advanced use.
size_gb
property
Model size in gigabytes (0.0 if unknown).
ModelSelector
List, select, and validate models for the configured AI backend.
Parameters
config:
The application :class:~src.config.Config object.
Usage
::
selector = ModelSelector(config)
models = selector.list_models() # fetch from backend
current = selector.get_current_model() # from config
warning = selector.npu_warning(models[0]) # None → ok
selector.set_model("llama3.2:3b-q4_K_M") # update config in-memory
Source code in src/model_selector.py
| def __init__(self, config: Any) -> None: # noqa: ANN001
self._config = config
self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
list_models
Return a Future for all models available from the currently configured backend.
Network calls are made only to the locally configured backend URL.
Returns a Future containing an empty list (with a log warning) when the backend is
unreachable rather than raising an exception.
Parameters
timeout:
Seconds to wait for the backend to respond.
Returns
concurrent.futures.Future[list[ModelInfo]]
Future that resolves to models sorted alphabetically by name.
Source code in src/model_selector.py
| def list_models(self, timeout: int = 10) -> concurrent.futures.Future[list[ModelInfo]]:
"""Return a Future for all models available from the currently configured backend.
Network calls are made only to the locally configured backend URL.
Returns a Future containing an empty list (with a log warning) when the backend is
unreachable rather than raising an exception.
Parameters
----------
timeout:
Seconds to wait for the backend to respond.
Returns
-------
concurrent.futures.Future[list[ModelInfo]]
Future that resolves to models sorted alphabetically by name.
"""
def _fetch() -> list[ModelInfo]:
backend = self._config.backend
try:
if backend == "ollama":
return self._list_ollama(timeout)
elif backend == "openai":
return self._list_openai(timeout)
elif backend == "npu":
return self._list_npu()
else:
logger.warning("Unknown backend %r; cannot list models.", backend)
return []
except Exception as exc: # noqa: BLE001
logger.warning("Could not list models from %r backend: %s", backend, exc)
return []
return self._executor.submit(_fetch)
|
get_current_model
Return the model name currently configured for the active backend.
Returns an empty string if no model is configured.
Source code in src/model_selector.py
| def get_current_model(self) -> str:
"""Return the model name currently configured for the active backend.
Returns an empty string if no model is configured.
"""
backend = self._config.backend
if backend == "ollama":
return self._config.ollama.get("model", "")
elif backend == "openai":
return self._config.openai.get("model", "")
elif backend == "npu":
return self._config.npu.get("model_path", "")
return ""
|
set_model
Update the in-memory config to use model_name for the active backend.
This does not persist to disk automatically — call
:meth:~src.settings.SettingsManager.save to write the change.
Parameters
model_name:
Model identifier accepted by the active backend.
Source code in src/model_selector.py
| def set_model(self, model_name: str) -> None:
"""Update the in-memory config to use *model_name* for the active backend.
This does **not** persist to disk automatically — call
:meth:`~src.settings.SettingsManager.save` to write the change.
Parameters
----------
model_name:
Model identifier accepted by the active backend.
"""
backend = self._config.backend
if backend == "ollama":
self._config._data["ollama"]["model"] = model_name
elif backend == "openai":
self._config._data["openai"]["model"] = model_name
elif backend == "npu":
self._config._data["npu"]["model_path"] = model_name
logger.info("Model updated to %r (backend=%r)", model_name, backend)
|
npu_warning
Return a warning string if model is unlikely to work well on NPU.
Returns None when no issues are detected (the model should run
fine on the AMD Ryzen AI NPU or the current backend is not NPU).
Parameters
model:
A :class:ModelInfo instance or a bare model name string.
Returns
str | None
Human-readable warning, or None if no warning applies.
Source code in src/model_selector.py
| def npu_warning(self, model: ModelInfo | str) -> str | None:
"""Return a warning string if *model* is unlikely to work well on NPU.
Returns ``None`` when no issues are detected (the model should run
fine on the AMD Ryzen AI NPU or the current backend is not NPU).
Parameters
----------
model:
A :class:`ModelInfo` instance or a bare model name string.
Returns
-------
str | None
Human-readable warning, or ``None`` if no warning applies.
"""
if isinstance(model, str):
model = ModelInfo(name=model)
name_lower = model.name.lower()
for rule in _NPU_RULES:
if re.search(rule["pattern"], name_lower, re.IGNORECASE):
level: str = rule["level"]
reason: str | None = rule["reason"]
if level == "no":
return f"⛔ Not recommended for NPU: {reason}"
if level == "warn":
return f"⚠ NPU warning: {reason}"
# level == "ok"
return None
# No rule matched — give a generic size-based check
cfg_warn_gb: float = float(
self._config.get("model_selector", {}).get("size_warning_gb", 13.0)
if hasattr(self._config, "get") else 13.0
)
try:
from src.npu_benchmark import probe_hardware
hw = probe_hardware()
if hw.ram_gb > 0:
# NPU models share system RAM. Usually half of system RAM is a safe threshold
cfg_warn_gb = max(4.0, hw.ram_gb * 0.5)
# Speed/compute limits based on TOPS
if hw.npu_tops > 0:
if hw.npu_tops < 10:
cfg_warn_gb = min(cfg_warn_gb, 3.0)
elif hw.npu_tops < 30:
cfg_warn_gb = min(cfg_warn_gb, 8.0)
except Exception:
pass
if model.size_gb and model.size_gb > cfg_warn_gb:
return (
f"⚠ NPU warning: This model is {model.size_gb:.1f} GB which may "
f"exceed NPU capabilities (threshold: {cfg_warn_gb:.0f} GB). "
"Consider a smaller or more aggressively quantized variant."
)
return None
|
model_summary
Return a dict suitable for display in the settings UI.
Keys: name, size_gb, family, quantization,
is_vision, npu_ok, npu_warning.
Source code in src/model_selector.py
| def model_summary(self, model: ModelInfo) -> dict[str, Any]:
"""Return a dict suitable for display in the settings UI.
Keys: ``name``, ``size_gb``, ``family``, ``quantization``,
``is_vision``, ``npu_ok``, ``npu_warning``.
"""
warning = self.npu_warning(model)
return {
"name": model.name,
"size_gb": round(model.size_gb, 2),
"family": model.family,
"quantization": model.quantization,
"is_vision": model.is_vision,
"npu_ok": warning is None,
"npu_warning": warning or "",
}
|
get_npu_suggestions
staticmethod
Return the curated catalog of NPU-recommended models.
Returns entries from :data:~src.npu_model_installer.MODEL_CATALOG
sorted by NPU fit (best first). Vision-capable models are listed
before text-only models within the same fit tier.
Returns
list[ModelCatalogEntry]
Catalog entries rated "excellent" or "good".
Example
::
for entry in ModelSelector.get_npu_suggestions():
print(entry.name, entry.npu_fit_label, "vision=" + str(entry.is_vision))
Source code in src/model_selector.py
| @staticmethod
def get_npu_suggestions() -> list[Any]:
"""Return the curated catalog of NPU-recommended models.
Returns entries from :data:`~src.npu_model_installer.MODEL_CATALOG`
sorted by NPU fit (best first). Vision-capable models are listed
before text-only models within the same fit tier.
Returns
-------
list[ModelCatalogEntry]
Catalog entries rated ``"excellent"`` or ``"good"``.
Example
-------
::
for entry in ModelSelector.get_npu_suggestions():
print(entry.name, entry.npu_fit_label, "vision=" + str(entry.is_vision))
"""
from src.npu_model_installer import get_npu_suggestions
return get_npu_suggestions()
|
get_vision_model_suggestions
staticmethod
get_vision_model_suggestions()
Return only vision-capable models from the catalog.
Returns
list[ModelCatalogEntry]
Vision-capable catalog entries sorted by NPU fit.
Example
::
for entry in ModelSelector.get_vision_model_suggestions():
print(entry.name, entry.size_description)
Source code in src/model_selector.py
| @staticmethod
def get_vision_model_suggestions() -> list[Any]:
"""Return only vision-capable models from the catalog.
Returns
-------
list[ModelCatalogEntry]
Vision-capable catalog entries sorted by NPU fit.
Example
-------
::
for entry in ModelSelector.get_vision_model_suggestions():
print(entry.name, entry.size_description)
"""
from src.npu_model_installer import get_vision_models
return get_vision_models()
|
get_default_npu_model_info
staticmethod
get_default_npu_model_info()
Return metadata for the default bundled NPU vision model.
Returns
dict
Same keys as :meth:NPUModelInstaller.model_info.
Source code in src/model_selector.py
| @staticmethod
def get_default_npu_model_info() -> dict[str, Any]:
"""Return metadata for the default bundled NPU vision model.
Returns
-------
dict
Same keys as :meth:`NPUModelInstaller.model_info`.
"""
from src.npu_model_installer import NPUModelInstaller
return NPUModelInstaller().model_info()
|