Skip to content

Commit

Permalink
Add git hash in config (#253)
Browse files Browse the repository at this point in the history
* Add git hash to fingerprint

* Change name to metadata.yaml

* add test and add seeking of installation with __file__

* Remove test and print statement

* fix pyright

* Fix bug with non-editable installs

* Fix typo in tool.setuptools.package-data

* Fix CalledProcessError with non-editable installs

---------

Co-authored-by: Nora Belrose <[email protected]>
  • Loading branch information
derpyplops and norabelrose authored Jul 20, 2023
1 parent 9593fe5 commit 1b31702
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 12 deletions.
44 changes: 35 additions & 9 deletions elk/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import random
import subprocess
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
Expand All @@ -17,6 +18,8 @@
from torch import Tensor
from tqdm import tqdm

import elk

from .debug_logging import save_debug_log
from .extraction import Extract, extract
from .extraction.dataset_name import DatasetDictWithName
Expand All @@ -31,6 +34,19 @@
)


def fetch_git_hash() -> str | None:
try:
return (
subprocess.check_output(
["git", "rev-parse", "HEAD"], cwd=Path(elk.__file__).parent.parent
)
.decode("ascii")
.strip()
)
except (NotADirectoryError, subprocess.CalledProcessError):
return


@dataclass
class Run(ABC, Serializable):
data: Extract
Expand Down Expand Up @@ -86,15 +102,7 @@ def execute(
# properly without this flag enabled.
save(self, self.out_dir / "cfg.yaml", save_dc_types=True)

path = self.out_dir / "fingerprints.yaml"
with open(path, "w") as meta_f:
yaml.dump(
{
ds_name: {split: ds[split]._fingerprint for split in ds.keys()}
for ds_name, ds in self.datasets
},
meta_f,
)
self.write_metadata()

devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem)
num_devices = len(devices)
Expand Down Expand Up @@ -190,3 +198,21 @@ def apply_to_layers(
df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False)
if self.debug:
save_debug_log(self.datasets, self.out_dir)

def write_metadata(self):
"""Write metadata about the run to a yaml file."""
assert self.out_dir is not None
with open(self.out_dir / "metadata.yaml", "w") as meta_f:
dataset_fingerprints = {
ds_name: {split: ds[split]._fingerprint for split in ds.keys()}
for ds_name, ds in self.datasets
}
metadata = dict()
metadata["datasets"] = dataset_fingerprints
git_hash = fetch_git_hash()
if git_hash is not None:
metadata["git_hash"] = git_hash
yaml.dump(
metadata,
meta_f,
)
4 changes: 2 additions & 2 deletions tests/test_smoke_elicit.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_smoke_elicit_run_tiny_gpt2_ccs(tmp_path: Path):
created_file_names = {file.name for file in files}
expected_files = [
"cfg.yaml",
"fingerprints.yaml",
"metadata.yaml",
"lr_models",
"reporters",
"eval.csv",
Expand Down Expand Up @@ -58,7 +58,7 @@ def test_smoke_elicit_run_tiny_gpt2_eigen(tmp_path: Path):
created_file_names = {file.name for file in files}
expected_files = [
"cfg.yaml",
"fingerprints.yaml",
"metadata.yaml",
"lr_models",
"reporters",
"eval.csv",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_smoke_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

EVAL_EXPECTED_FILES = [
"cfg.yaml",
"fingerprints.yaml",
"metadata.yaml",
"eval.csv",
]

Expand Down

0 comments on commit 1b31702

Please sign in to comment.