Skip to content

Commit 8408df8

Browse files
committed
Add support for converting to OCI artifacts
Signed-off-by: Daniel J Walsh <[email protected]>
1 parent ef68632 commit 8408df8

File tree

11 files changed

+1395
-56
lines changed

11 files changed

+1395
-56
lines changed

docs/ramalama-convert.1.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ Image to use when converting to GGUF format (when then `--gguf` option has been
3939
executable and available in the `PATH`. The script is available from the `llama.cpp` GitHub repo. Defaults to the current
4040
`quay.io/ramalama/ramalama-rag` image.
4141

42-
#### **--type**=*raw* | *car*
42+
#### **--type**="artifact" | *raw* | *car*
4343

44-
type of OCI Model Image to convert.
44+
Convert the MODEL to the specified OCI Object
4545

46-
| Type | Description |
47-
| ---- | ------------------------------------------------------------- |
48-
| car | Includes base image with the model stored in a /models subdir |
49-
| raw | Only the model and a link file model.file to it stored at / |
46+
| Type | Description |
47+
| -------- | ------------------------------------------------------------- |
48+
| artifact | Store AI Models as artifacts |
49+
| car | Traditional OCI image including base image with the model stored in a /models subdir |
50+
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |
5051

5152
## EXAMPLE
5253

docs/ramalama.conf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@
3232
#
3333
#carimage = "registry.access.redhat.com/ubi10-micro:latest"
3434

35+
# Convert the MODEL to the specified OCI Object
36+
# Options: artifact, car, raw
37+
#
38+
# artifact: Store AI Models as artifacts
39+
# car: Traditional OCI image including base image with the model stored in a /models subdir
40+
# raw: Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /
41+
#convert_type = "raw"
42+
3543
# Run RamaLama in the default container.
3644
#
3745
#container = true

docs/ramalama.conf.5.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,18 @@ Min chunk size to attempt reusing from the cache via KV shifting
8484
Run RamaLama in the default container.
8585
RAMALAMA_IN_CONTAINER environment variable overrides this field.
8686

87+
#convert_type = "raw"
88+
89+
Convert the MODEL to the specified OCI Object
90+
Options: artifact, car, raw
91+
92+
| Type | Description |
93+
| -------- | ------------------------------------------------------------- |
94+
| artifact | Store AI Models as artifacts |
95+
| car | Traditional OCI image including base image with the model stored in a /models subdir |
96+
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |
97+
98+
8799
**ctx_size**=0
88100

89101
Size of the prompt context (0 = loaded from model)

ramalama/cli.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -719,11 +719,12 @@ def convert_parser(subparsers):
719719
)
720720
parser.add_argument(
721721
"--type",
722-
default="raw",
723-
choices=["car", "raw"],
722+
default=CONFIG.convert_type,
723+
choices=["artifact", "car", "raw"],
724724
help="""\
725725
type of OCI Model Image to push.
726726
727+
Model "artifact" stores the AI Model as an OCI Artifact.
727728
Model "car" includes base image with the model stored in a /models subdir.
728729
Model "raw" contains the model and a link file model.file to it stored at /.""",
729730
)
@@ -762,11 +763,12 @@ def push_parser(subparsers):
762763
add_network_argument(parser)
763764
parser.add_argument(
764765
"--type",
765-
default="raw",
766-
choices=["car", "raw"],
766+
default=CONFIG.convert_type,
767+
choices=["artifact", "car", "raw"],
767768
help="""\
768769
type of OCI Model Image to push.
769770
771+
Model "artifact" stores the AI Model as an OCI Artifact.
770772
Model "car" includes base image with the model stored in a /models subdir.
771773
Model "raw" contains the model and a link file model.file to it stored at /.""",
772774
)
@@ -1424,7 +1426,7 @@ def _rm_model(models, args):
14241426
try:
14251427
m = New(model, args)
14261428
m.remove(args)
1427-
except KeyError as e:
1429+
except (KeyError, subprocess.CalledProcessError) as e:
14281430
for prefix in MODEL_TYPES:
14291431
if model.startswith(prefix + "://"):
14301432
if not args.ignore:

ramalama/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class BaseConfig:
136136
carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
137137
container: bool = None # type: ignore
138138
ctx_size: int = 0
139+
convert_type: Literal["artifact", "car", "raw"] = "raw"
139140
default_image: str = DEFAULT_IMAGE
140141
default_rag_image: str = DEFAULT_RAG_IMAGE
141142
dryrun: bool = False

ramalama/oci_tools.py

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,66 @@
88
ocilabeltype = "org.containers.type"
99

1010

11-
def engine_supports_manifest_attributes(engine):
11+
def convert_from_human_readable_size(input) -> float:
12+
sizes = [("KB", 1024), ("MB", 1024**2), ("GB", 1024**3), ("TB", 1024**4), ("B", 1)]
13+
for unit, size in sizes:
14+
if input.endswith(unit) or input.endswith(unit.lower()):
15+
return float(input[: -len(unit)]) * size
16+
17+
return float(input)
18+
19+
20+
def list_artifacts(args: EngineArgType):
21+
if args.engine == "docker":
22+
return []
23+
24+
conman_args = [
25+
args.engine,
26+
"artifact",
27+
"ls",
28+
"--format",
29+
(
30+
'{"name":"oci://{{ .Repository }}:{{ .Tag }}",\
31+
"created":"{{ .CreatedAt }}", \
32+
"size":"{{ .Size }}", \
33+
"ID":"{{ .Digest }}"},'
34+
),
35+
]
36+
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
37+
if output == "":
38+
return []
39+
40+
artifacts = json.loads(f"[{output[:-1]}]")
41+
models = []
42+
for artifact in artifacts:
43+
conman_args = [
44+
args.engine,
45+
"artifact",
46+
"inspect",
47+
artifact["ID"],
48+
]
49+
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
50+
51+
if output == "":
52+
continue
53+
inspect = json.loads(output)
54+
if "Manifest" not in inspect:
55+
continue
56+
if "artifactType" not in inspect["Manifest"]:
57+
continue
58+
if inspect["Manifest"]['artifactType'] != annotations.ArtifactTypeModelManifest:
59+
continue
60+
models += [
61+
{
62+
"name": artifact["name"],
63+
"modified": artifact["created"],
64+
"size": convert_from_human_readable_size(artifact["size"]),
65+
}
66+
]
67+
return models
68+
69+
70+
def engine_supports_manifest_attributes(engine) -> bool:
1271
if not engine or engine == "" or engine == "docker":
1372
return False
1473
if engine == "podman" and engine_version(engine) < "5":
@@ -91,26 +150,26 @@ def list_models(args: EngineArgType):
91150
"--format",
92151
formatLine,
93152
]
153+
models = []
94154
output = run_cmd(conman_args, env={"TZ": "UTC"}).stdout.decode("utf-8").strip()
95-
if output == "":
96-
return []
97-
98-
models = json.loads(f"[{output[:-1]}]")
99-
# exclude dangling images having no tag (i.e. <none>:<none>)
100-
models = [model for model in models if model["name"] != "oci://<none>:<none>"]
101-
102-
# Grab the size from the inspect command
103-
if conman == "docker":
104-
# grab the size from the inspect command
105-
for model in models:
106-
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
107-
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
108-
# convert the number value from the string output
109-
model["size"] = int(output)
110-
# drop the id from the model
111-
del model["id"]
155+
if output != "":
156+
models += json.loads(f"[{output[:-1]}]")
157+
# exclude dangling images having no tag (i.e. <none>:<none>)
158+
models = [model for model in models if model["name"] != "oci://<none>:<none>"]
159+
160+
# Grab the size from the inspect command
161+
if conman == "docker":
162+
# grab the size from the inspect command
163+
for model in models:
164+
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
165+
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
166+
# convert the number value from the string output
167+
model["size"] = int(output)
168+
# drop the id from the model
169+
del model["id"]
112170

113171
models += list_manifests(args)
172+
models += list_artifacts(args)
114173
for model in models:
115174
# Convert to ISO 8601 format
116175
parsed_date = datetime.fromisoformat(

ramalama/transports/base.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import os
23
import platform
34
import random
@@ -145,6 +146,7 @@ def __init__(self, model: str, model_store_path: str):
145146
self._model_type: str
146147
self._model_name, self._model_tag, self._model_organization = self.extract_model_identifiers()
147148
self._model_type = type(self).__name__.lower()
149+
self.artifact = False
148150

149151
self._model_store_path: str = model_store_path
150152
self._model_store: Optional[ModelStore] = None
@@ -200,6 +202,8 @@ def _get_entry_model_path(self, use_container: bool, should_generate: bool, dry_
200202

201203
if self.model_type == 'oci':
202204
if use_container or should_generate:
205+
if getattr(self, "artifact", False):
206+
return os.path.join(MNT_DIR, self.artifact_name())
203207
return os.path.join(MNT_DIR, 'model.file')
204208
else:
205209
return f"oci://{self.model}"
@@ -345,9 +349,10 @@ def exec_model_in_container(self, cmd_args, args):
345349
def setup_mounts(self, args):
346350
if args.dryrun:
347351
return
352+
348353
if self.model_type == 'oci':
349354
if self.engine.use_podman:
350-
mount_cmd = f"--mount=type=image,src={self.model},destination={MNT_DIR},subpath=/models,rw=false"
355+
mount_cmd = self.mount_cmd()
351356
elif self.engine.use_docker:
352357
output_filename = self._get_entry_model_path(args.container, True, args.dryrun)
353358
volume = populate_volume_from_image(self, os.path.basename(output_filename))
@@ -651,40 +656,48 @@ def inspect(
651656
as_json: bool = False,
652657
dryrun: bool = False,
653658
) -> None:
659+
print(self.get_inspect(show_all, show_all_metadata, get_field, dryrun, as_json=as_json))
660+
661+
def get_inspect(
662+
self,
663+
show_all: bool = False,
664+
show_all_metadata: bool = False,
665+
get_field: str = "",
666+
dryrun: bool = False,
667+
as_json: bool = False,
668+
) -> Any:
654669
model_name = self.filename
655670
model_registry = self.type.lower()
656-
model_path = self._get_inspect_model_path(dryrun)
657-
671+
model_path = self._get_entry_model_path(False, False, dryrun)
658672
if GGUFInfoParser.is_model_gguf(model_path):
659673
if not show_all_metadata and get_field == "":
660674
gguf_info: GGUFModelInfo = GGUFInfoParser.parse(model_name, model_registry, model_path)
661-
print(gguf_info.serialize(json=as_json, all=show_all))
662-
return
675+
return gguf_info.serialize(json=as_json, all=show_all)
663676

664677
metadata = GGUFInfoParser.parse_metadata(model_path)
665678
if show_all_metadata:
666-
print(metadata.serialize(json=as_json))
667-
return
679+
return metadata.serialize(json=as_json)
668680
elif get_field != "": # If a specific field is requested, print only that field
669681
field_value = metadata.get(get_field)
670682
if field_value is None:
671683
raise KeyError(f"Field '{get_field}' not found in GGUF model metadata")
672-
print(field_value)
673-
return
684+
return field_value
674685

675686
if SafetensorInfoParser.is_model_safetensor(model_name):
676687
safetensor_info: SafetensorModelInfo = SafetensorInfoParser.parse(model_name, model_registry, model_path)
677-
print(safetensor_info.serialize(json=as_json, all=show_all))
678-
return
688+
return safetensor_info.serialize(json=as_json, all=show_all)
679689

680-
print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=as_json))
690+
return ModelInfoBase(model_name, model_registry, model_path).serialize(json=as_json)
681691

682-
def print_pull_message(self, model_name):
692+
def print_pull_message(self, model_name) -> None:
683693
model_name = trim_model_name(model_name)
684694
# Write messages to stderr
685695
perror(f"Downloading {model_name} ...")
686696
perror(f"Trying to pull {model_name} ...")
687697

698+
def is_artifact(self) -> bool:
699+
return False
700+
688701

689702
def compute_ports(exclude: list[str] | None = None) -> list[int]:
690703
exclude = exclude and set(map(int, exclude)) or set()

0 commit comments

Comments
 (0)