diff --git a/internal/adapter/converter/unified_converter.go b/internal/adapter/converter/unified_converter.go index 961ac61..914d493 100644 --- a/internal/adapter/converter/unified_converter.go +++ b/internal/adapter/converter/unified_converter.go @@ -74,9 +74,15 @@ func (c *UnifiedConverter) ConvertToFormat(models []*domain.UnifiedModel, filter func (c *UnifiedConverter) convertModel(model *domain.UnifiedModel) UnifiedModelData { availability := make([]EndpointStatus, 0, len(model.SourceEndpoints)) for _, ep := range model.SourceEndpoints { + // Use GetEffectiveState() rather than ep.State directly: the lifecycle + // unifier updates the typed ModelState field, while ep.State (the legacy + // string) is only set at discovery time and never transitions. Reading + // the effective state ensures health-driven transitions surface in the + // API response. GetEffectiveState() also normalises legacy string values + // ("loaded", "not-loaded", "available") to the typed enum. availability = append(availability, EndpointStatus{ Endpoint: ep.EndpointName, // Use endpoint name instead of URL - State: ep.State, + State: string(ep.GetEffectiveState()), }) } // OLLA-85: [Unification] Models with different digests fail to unify correctly. diff --git a/internal/adapter/registry/profile/parsers.go b/internal/adapter/registry/profile/parsers.go index ea91e53..c2db800 100644 --- a/internal/adapter/registry/profile/parsers.go +++ b/internal/adapter/registry/profile/parsers.go @@ -225,6 +225,15 @@ func (p *openAIParser) Parse(data []byte) ([]*domain.ModelInfo, error) { Name: model.ID, Type: model.Object, // always "model" in openai responses LastSeen: now, + // OpenAI-compatible /v1/models has no size or state fields. For these + // backends (vllm, llama.cpp, Infinity, etc.) the presence of a model + // in the discovery response IS the availability signal — these servers + // only list models that are loaded and ready to serve. Without a + // non-zero Size, MapModelState() falls through to "unknown" and the + // model never appears available in the unified /olla/models response. + // Set a sentinel size of 1 to signal "loaded / available, exact size + // not reported by this backend". + Size: 1, } // openai is stingy with metadata