Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions docs/ramalama-convert.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ Image to use when converting to GGUF format (when then `--gguf` option has been
executable and available in the `PATH`. The script is available from the `llama.cpp` GitHub repo. Defaults to the current
`quay.io/ramalama/ramalama-rag` image.

#### **--type**=*raw* | *car*
#### **--type**="artifact" | *raw* | *car*

type of OCI Model Image to convert.
Convert the MODEL to the specified OCI Object

| Type | Description |
| ---- | ------------------------------------------------------------- |
| car | Includes base image with the model stored in a /models subdir |
| raw | Only the model and a link file model.file to it stored at / |
| Type | Description |
| -------- | ------------------------------------------------------------- |
| artifact | Store AI Models as artifacts |
| car | Traditional OCI image including base image with the model stored in a /models subdir |
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |

## EXAMPLE

Expand Down
8 changes: 8 additions & 0 deletions docs/ramalama.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
#
#carimage = "registry.access.redhat.com/ubi10-micro:latest"

# Convert the MODEL to the specified OCI Object
# Options: artifact, car, raw
#
# artifact: Store AI Models as artifacts
# car: Traditional OCI image including base image with the model stored in a /models subdir
# raw: Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /
#convert_type = "raw"

# Run RamaLama in the default container.
#
#container = true
Expand Down
12 changes: 12 additions & 0 deletions docs/ramalama.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ Min chunk size to attempt reusing from the cache via KV shifting
Run RamaLama in the default container.
RAMALAMA_IN_CONTAINER environment variable overrides this field.

#convert_type = "raw"

Convert the MODEL to the specified OCI Object
Options: artifact, car, raw

| Type | Description |
| -------- | ------------------------------------------------------------- |
| artifact | Store AI Models as artifacts |
| car | Traditional OCI image including base image with the model stored in a /models subdir |
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |


**ctx_size**=0

Size of the prompt context (0 = loaded from model)
Expand Down
12 changes: 7 additions & 5 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,11 +719,12 @@ def convert_parser(subparsers):
)
parser.add_argument(
"--type",
default="raw",
choices=["car", "raw"],
default=CONFIG.convert_type,
choices=["artifact", "car", "raw"],
help="""\
type of OCI Model Image to push.

Model "artifact" stores the AI Model as an OCI Artifact.
Model "car" includes base image with the model stored in a /models subdir.
Model "raw" contains the model and a link file model.file to it stored at /.""",
)
Expand Down Expand Up @@ -762,11 +763,12 @@ def push_parser(subparsers):
add_network_argument(parser)
parser.add_argument(
"--type",
default="raw",
choices=["car", "raw"],
default=CONFIG.convert_type,
choices=["artifact", "car", "raw"],
help="""\
type of OCI Model Image to push.

Model "artifact" stores the AI Model as an OCI Artifact.
Model "car" includes base image with the model stored in a /models subdir.
Model "raw" contains the model and a link file model.file to it stored at /.""",
)
Expand Down Expand Up @@ -1424,7 +1426,7 @@ def _rm_model(models, args):
try:
m = New(model, args)
m.remove(args)
except KeyError as e:
except (KeyError, subprocess.CalledProcessError) as e:
for prefix in MODEL_TYPES:
if model.startswith(prefix + "://"):
if not args.ignore:
Expand Down
1 change: 1 addition & 0 deletions ramalama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ class BaseConfig:
carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
container: bool = None # type: ignore
ctx_size: int = 0
convert_type: Literal["artifact", "car", "raw"] = "raw"
default_image: str = DEFAULT_IMAGE
default_rag_image: str = DEFAULT_RAG_IMAGE
dryrun: bool = False
Expand Down
95 changes: 77 additions & 18 deletions ramalama/oci_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,66 @@
ocilabeltype = "org.containers.type"


def engine_supports_manifest_attributes(engine):
def convert_from_human_readable_size(input) -> float:
sizes = [("KB", 1024), ("MB", 1024**2), ("GB", 1024**3), ("TB", 1024**4), ("B", 1)]
for unit, size in sizes:
if input.endswith(unit) or input.endswith(unit.lower()):
return float(input[: -len(unit)]) * size

return float(input)


def list_artifacts(args: EngineArgType):
if args.engine == "docker":
return []

conman_args = [
args.engine,
"artifact",
"ls",
"--format",
(
'{"name":"oci://{{ .Repository }}:{{ .Tag }}",\
"created":"{{ .CreatedAt }}", \
"size":"{{ .Size }}", \
"ID":"{{ .Digest }}"},'
),
]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
if output == "":
return []

artifacts = json.loads(f"[{output[:-1]}]")
models = []
for artifact in artifacts:
conman_args = [
args.engine,
"artifact",
"inspect",
artifact["ID"],
]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()

if output == "":
continue
inspect = json.loads(output)
if "Manifest" not in inspect:
continue
if "artifactType" not in inspect["Manifest"]:
continue
if inspect["Manifest"]['artifactType'] != annotations.ArtifactTypeModelManifest:
continue
models += [
{
"name": artifact["name"],
"modified": artifact["created"],
"size": convert_from_human_readable_size(artifact["size"]),
}
]
return models


def engine_supports_manifest_attributes(engine) -> bool:
if not engine or engine == "" or engine == "docker":
return False
if engine == "podman" and engine_version(engine) < "5":
Expand Down Expand Up @@ -91,26 +150,26 @@ def list_models(args: EngineArgType):
"--format",
formatLine,
]
models = []
output = run_cmd(conman_args, env={"TZ": "UTC"}).stdout.decode("utf-8").strip()
if output == "":
return []

models = json.loads(f"[{output[:-1]}]")
# exclude dangling images having no tag (i.e. <none>:<none>)
models = [model for model in models if model["name"] != "oci://<none>:<none>"]

# Grab the size from the inspect command
if conman == "docker":
# grab the size from the inspect command
for model in models:
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
# convert the number value from the string output
model["size"] = int(output)
# drop the id from the model
del model["id"]
if output != "":
models += json.loads(f"[{output[:-1]}]")
# exclude dangling images having no tag (i.e. <none>:<none>)
models = [model for model in models if model["name"] != "oci://<none>:<none>"]

# Grab the size from the inspect command
if conman == "docker":
# grab the size from the inspect command
for model in models:
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
# convert the number value from the string output
model["size"] = int(output)
# drop the id from the model
del model["id"]

models += list_manifests(args)
models += list_artifacts(args)
Comment on lines 113 to +172
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The models list is extended with both manifests and artifacts, which may result in duplicate entries if the same model exists in both forms.

Deduplicate models by name or ID to prevent listing the same model multiple times.

Suggested implementation:

    # Combine manifests and artifacts, then deduplicate by 'name'
    manifest_models = list_manifests(args)
    artifact_models = list_artifacts(args)
    combined_models = manifest_models + artifact_models

    # Deduplicate by 'name'
    seen_names = set()
    deduped_models = []
    for model in combined_models:
        model_name = model.get("name")
        if model_name and model_name not in seen_names:
            deduped_models.append(model)
            seen_names.add(model_name)

    models += deduped_models

    for model in models:
        # Convert to ISO 8601 format
  • If your models use a different unique key (e.g., "id" instead of "name"), replace "name" with the appropriate key in the deduplication logic.
  • If models is not empty before this block, you may want to deduplicate the entire list (including existing entries).

for model in models:
# Convert to ISO 8601 format
parsed_date = datetime.fromisoformat(
Expand Down
36 changes: 24 additions & 12 deletions ramalama/transports/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def __init__(self, model: str, model_store_path: str):
self._model_type: str
self._model_name, self._model_tag, self._model_organization = self.extract_model_identifiers()
self._model_type = type(self).__name__.lower()
self.artifact = False

self._model_store_path: str = model_store_path
self._model_store: Optional[ModelStore] = None
Expand Down Expand Up @@ -200,6 +201,8 @@ def _get_entry_model_path(self, use_container: bool, should_generate: bool, dry_

if self.model_type == 'oci':
if use_container or should_generate:
if getattr(self, "artifact", False):
return os.path.join(MNT_DIR, self.artifact_name())
return os.path.join(MNT_DIR, 'model.file')
else:
return f"oci://{self.model}"
Expand Down Expand Up @@ -345,9 +348,10 @@ def exec_model_in_container(self, cmd_args, args):
def setup_mounts(self, args):
if args.dryrun:
return

if self.model_type == 'oci':
if self.engine.use_podman:
mount_cmd = f"--mount=type=image,src={self.model},destination={MNT_DIR},subpath=/models,rw=false"
mount_cmd = self.mount_cmd()
elif self.engine.use_docker:
output_filename = self._get_entry_model_path(args.container, True, args.dryrun)
volume = populate_volume_from_image(self, os.path.basename(output_filename))
Expand Down Expand Up @@ -651,40 +655,48 @@ def inspect(
as_json: bool = False,
dryrun: bool = False,
) -> None:
print(self.get_inspect(show_all, show_all_metadata, get_field, dryrun, as_json))

def get_inspect(
self,
show_all: bool = False,
show_all_metadata: bool = False,
get_field: str = "",
dryrun: bool = False,
as_json: bool = False,
) -> Any:
model_name = self.filename
model_registry = self.type.lower()
model_path = self._get_inspect_model_path(dryrun)

if GGUFInfoParser.is_model_gguf(model_path):
if not show_all_metadata and get_field == "":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): We've found these issues:

gguf_info: GGUFModelInfo = GGUFInfoParser.parse(model_name, model_registry, model_path)
print(gguf_info.serialize(json=as_json, all=show_all))
return
return gguf_info.serialize(json=as_json, all=show_all)

metadata = GGUFInfoParser.parse_metadata(model_path)
if show_all_metadata:
print(metadata.serialize(json=as_json))
return
return metadata.serialize(json=as_json)
elif get_field != "": # If a specific field is requested, print only that field
field_value = metadata.get(get_field)
if field_value is None:
raise KeyError(f"Field '{get_field}' not found in GGUF model metadata")
print(field_value)
return
return field_value

if SafetensorInfoParser.is_model_safetensor(model_name):
safetensor_info: SafetensorModelInfo = SafetensorInfoParser.parse(model_name, model_registry, model_path)
print(safetensor_info.serialize(json=as_json, all=show_all))
return
return safetensor_info.serialize(json=as_json, all=show_all)

print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=as_json))
return ModelInfoBase(model_name, model_registry, model_path).serialize(json=as_json)

def print_pull_message(self, model_name):
def print_pull_message(self, model_name) -> None:
model_name = trim_model_name(model_name)
# Write messages to stderr
perror(f"Downloading {model_name} ...")
perror(f"Trying to pull {model_name} ...")

def is_artifact(self) -> bool:
return False


def compute_ports(exclude: list[str] | None = None) -> list[int]:
exclude = exclude and set(map(int, exclude)) or set()
Expand Down
Loading
Loading