diff --git a/automatminer_dev/config.py b/automatminer_dev/config.py
index 2ce6044b..945b8c43 100644
--- a/automatminer_dev/config.py
+++ b/automatminer_dev/config.py
@@ -29,6 +29,7 @@
"target": "log10(K_VRH)",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": None,
}
LOG_GVRH = {
@@ -37,6 +38,7 @@
"target": "log10(G_VRH)",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": None,
}
DIELECTRIC = {
@@ -45,6 +47,7 @@
"target": "n",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": None,
}
JDFT2D = {
@@ -53,6 +56,7 @@
"target": "exfoliation_en",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "meV/atom"
}
MP_GAP = {
@@ -61,6 +65,7 @@
"target": "gap pbe",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV"
}
MP_IS_METAL = {
@@ -69,6 +74,7 @@
"target": "is_metal",
"problem_type": AMM_CLF_NAME,
"clf_pos_label": True,
+ "unit": None
}
MP_E_FORM = {
@@ -77,6 +83,7 @@
"target": "e_form",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV/atom"
}
PEROVSKITES = {
@@ -85,6 +92,7 @@
"target": "e_form",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV"
}
GLASS = {
@@ -93,6 +101,7 @@
"target": "gfa",
"problem_type": AMM_CLF_NAME,
"clf_pos_label": True,
+ "unit": None
}
EXPT_IS_METAL = {
@@ -101,6 +110,7 @@
"target": "is_metal",
"problem_type": AMM_CLF_NAME,
"clf_pos_label": True,
+ "unit": None
}
EXPT_GAP = {
@@ -109,6 +119,7 @@
"target": "gap expt",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV"
}
PHONONS = {
@@ -117,6 +128,7 @@
"target": "last phdos peak",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "cm^-1"
}
STEELS = {
@@ -125,6 +137,7 @@
"target": "yield strength",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "MPa"
}
BENCHMARK_DEBUG_SET = [JDFT2D, PHONONS, EXPT_IS_METAL, STEELS]
@@ -143,3 +156,17 @@
STEELS,
PHONONS,
]
+
+HAS_STRUCTURE = [
+ LOG_KVRH,
+ LOG_GVRH,
+ DIELECTRIC,
+ JDFT2D,
+ MP_GAP,
+ MP_IS_METAL,
+ MP_E_FORM,
+ PEROVSKITES,
+ PHONONS
+]
+
+BENCHMARK_DICT = {ds["name"]: ds for ds in BENCHMARK_FULL_SET}
\ No newline at end of file
diff --git a/automatminer_dev/matbench/dataset_creation/__init__.py b/automatminer_dev/matbench/dataset_creation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/automatminer_dev/matbench/castelli.py b/automatminer_dev/matbench/dataset_creation/castelli.py
similarity index 100%
rename from automatminer_dev/matbench/castelli.py
rename to automatminer_dev/matbench/dataset_creation/castelli.py
diff --git a/automatminer_dev/matbench/dielectric.py b/automatminer_dev/matbench/dataset_creation/dielectric.py
similarity index 100%
rename from automatminer_dev/matbench/dielectric.py
rename to automatminer_dev/matbench/dataset_creation/dielectric.py
diff --git a/automatminer_dev/matbench/expt_gap.py b/automatminer_dev/matbench/dataset_creation/expt_gap.py
similarity index 91%
rename from automatminer_dev/matbench/expt_gap.py
rename to automatminer_dev/matbench/dataset_creation/expt_gap.py
index 9c79b767..c2907651 100644
--- a/automatminer_dev/matbench/expt_gap.py
+++ b/automatminer_dev/matbench/dataset_creation/expt_gap.py
@@ -73,6 +73,10 @@
df_new = df_new.reset_index(drop=True)
+# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which
+# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook,
+# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru
+
store_dataframe_as_json(df_new, "expt_gap.json.gz", compression="gz")
print(df_new)
diff --git a/automatminer_dev/matbench/expt_is_metal.py b/automatminer_dev/matbench/dataset_creation/expt_is_metal.py
similarity index 92%
rename from automatminer_dev/matbench/expt_is_metal.py
rename to automatminer_dev/matbench/dataset_creation/expt_is_metal.py
index 5cafe935..a33c802c 100644
--- a/automatminer_dev/matbench/expt_is_metal.py
+++ b/automatminer_dev/matbench/dataset_creation/expt_is_metal.py
@@ -74,6 +74,10 @@
df_new["is_metal"] = df_new["is_metal"] == 1
+# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which
+# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook,
+# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru
+
store_dataframe_as_json(df_new, "expt_is_metal.json.gz", compression="gz")
print(df_new)
diff --git a/automatminer_dev/matbench/glass.py b/automatminer_dev/matbench/dataset_creation/glass.py
similarity index 100%
rename from automatminer_dev/matbench/glass.py
rename to automatminer_dev/matbench/dataset_creation/glass.py
diff --git a/automatminer_dev/matbench/jdft2d.py b/automatminer_dev/matbench/dataset_creation/jdft2d.py
similarity index 100%
rename from automatminer_dev/matbench/jdft2d.py
rename to automatminer_dev/matbench/dataset_creation/jdft2d.py
diff --git a/automatminer_dev/matbench/mp_eform.py b/automatminer_dev/matbench/dataset_creation/mp_eform.py
similarity index 100%
rename from automatminer_dev/matbench/mp_eform.py
rename to automatminer_dev/matbench/dataset_creation/mp_eform.py
diff --git a/automatminer_dev/matbench/mp_elasticity.py b/automatminer_dev/matbench/dataset_creation/mp_elasticity.py
similarity index 100%
rename from automatminer_dev/matbench/mp_elasticity.py
rename to automatminer_dev/matbench/dataset_creation/mp_elasticity.py
diff --git a/automatminer_dev/matbench/mp_gaps.py b/automatminer_dev/matbench/dataset_creation/mp_gaps.py
similarity index 100%
rename from automatminer_dev/matbench/mp_gaps.py
rename to automatminer_dev/matbench/dataset_creation/mp_gaps.py
diff --git a/automatminer_dev/matbench/phonons.py b/automatminer_dev/matbench/dataset_creation/phonons.py
similarity index 100%
rename from automatminer_dev/matbench/phonons.py
rename to automatminer_dev/matbench/dataset_creation/phonons.py
diff --git a/automatminer_dev/matbench/steels.py b/automatminer_dev/matbench/dataset_creation/steels.py
similarity index 91%
rename from automatminer_dev/matbench/steels.py
rename to automatminer_dev/matbench/dataset_creation/steels.py
index bc0233cb..71a10114 100644
--- a/automatminer_dev/matbench/steels.py
+++ b/automatminer_dev/matbench/dataset_creation/steels.py
@@ -8,6 +8,8 @@
from matminer.datasets.dataset_retrieval import load_dataset
+
+# Note the units are in MPa, NOT GPa
if __name__ == "__main__":
df = load_dataset("steel_strength")
df = df[["formula", "yield strength"]]
diff --git a/automatminer_dev/matbench/docs/__init__.py b/automatminer_dev/matbench/docs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/automatminer_dev/matbench/get_info.py b/automatminer_dev/matbench/docs/formatting_matbench_table.py
similarity index 87%
rename from automatminer_dev/matbench/get_info.py
rename to automatminer_dev/matbench/docs/formatting_matbench_table.py
index 8a14ecfe..bd1ee747 100644
--- a/automatminer_dev/matbench/get_info.py
+++ b/automatminer_dev/matbench/docs/formatting_matbench_table.py
@@ -1,4 +1,12 @@
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info
+
+
+'''
+
+Helper function to format matbench documentation page.
+'''
+
+
datasets = get_available_datasets(print_format=None)
for dataset in datasets:
diff --git a/automatminer_dev/matbench/mpcontribs/__init__.py b/automatminer_dev/matbench/mpcontribs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py
new file mode 100644
index 00000000..bca132a7
--- /dev/null
+++ b/automatminer_dev/matbench/mpcontribs/upload.py
@@ -0,0 +1,219 @@
+import wget, json, os, math
+from string import capwords
+from pybtex.database import parse_string
+import pybtex.errors
+from mpcontribs.client import Client
+from pymatgen import MPRester, Structure
+import tqdm
+import pprint
+
+# from matminer.datasets.dataset_retrieval import (
+# get_all_dataset_info,
+# get_available_datasets,
+# load_dataset,
+# )
+
+from matminer.datasets import load_dataset
+
+from automatminer_dev.config import DIELECTRIC, JDFT2D, PEROVSKITES, STEELS, BENCHMARK_FULL_SET, BENCHMARK_DICT, HAS_STRUCTURE
+
+
+pybtex.errors.set_strict_mode(False)
+api_key = os.environ["MPCONTRIBS_API_KEY"]
+client = Client(api_key, host='ml-api.materialsproject.cloud')
+mprester = MPRester()
+
+
+# client.get_project("matbench_steels").pretty()
+
+
+fn = 'dataset_metadata.json'
+if not os.path.exists(fn):
+ wget.download(f'https://raw.githubusercontent.com/hackingmaterials/matminer/master/matminer/datasets/{fn}')
+metadata = json.load(open(fn, 'r'))
+metadata = {k: d for k, d in metadata.items() if "matbench" in k}
+
+
+
+# Creating new projects
+#######################
+# todo: might not have access to add new projects
+# for name, info in metadata.items():
+#
+# print(f"Uploading {name}")
+#
+# columns = {}
+# for col, text in info['columns'].items():
+# k = col.replace('_', '|').replace('-', '|').replace('(', ' ').replace(
+# ')', '')
+# columns[k] = text
+#
+# project = {
+# 'is_public': True,
+# 'owner': 'ardunn@lbl.gov',
+# "name": name,
+# 'title': name, # TODO update and set long_title
+# 'authors': 'A. Dunn, A. Jain',
+# 'description': info['description'],
+# 'other': {
+# 'columns': columns,
+# 'entries': info['num_entries']
+# },
+# 'references': []
+# }
+#
+# for ref in info['bibtex_refs']:
+#
+# if name == "matbench_phonons":
+# ref = ref.replace(
+# "petretto_dwaraknath_miranda_winston_giantomassi_rignanese_van setten_gonze_persson_hautier_2018",
+# "petretto2018")
+#
+# bib = parse_string(ref, 'bibtex')
+# for key, entry in bib.entries.items():
+# key_is_doi = key.startswith('doi:')
+# url = 'https://doi.org/' + key.split(':', 1)[
+# -1] if key_is_doi else entry.fields.get('url')
+# k = 'Zhuo2018' if key_is_doi else capwords(key.replace('_', ''))
+# if k.startswith('C2'):
+# k = 'Castelli2012'
+# elif k.startswith('Landolt'):
+# k = 'LB1997'
+# elif k == 'Citrine':
+# url = 'https://www.citrination.com'
+#
+# if len(k) > 8:
+# k = k[:4] + k[-4:]
+# project['references'].append({"label": k, "url": url})
+#
+# try:
+# print(client.projects.create_entry(project=project).result())
+# except Exception as ex:
+# print(
+# ex) # TODO should use get_entry to check existence -> use update_entry if project exists
+
+
+
+
+
+# Map of canonical yet non-mpcontribs-compatible tagret nams to compatible (unicode, no punctuation) target names
+target_map = {
+ "yield strength": "σᵧ",
+ "log10(K_VRH)": "log₁₀Kᵛʳʰ",
+ "log10(G_VRH)": "log₁₀Gᵛʳʰ",
+ "n": "𝑛",
+ "exfoliation_en": "Eˣ",
+ "gap pbe": "Eᵍ",
+ "is_metal": "metallic",
+ "e_form": "Eᶠ",
+ "gfa": "glass",
+ "gap expt": "Eᵍ",
+ "last phdos peak": "ωᵐᵃˣ",
+}
+
+
+# # Getting project-level metadata in order
+# #########################################
+#
+# # Add warning to mpcontribs since the results will be stored out of order.
+# # Also, fix columns for new mpcontribs deployment
+# for name, info in metadata.items():
+# mb_shortname = name.replace("matbench_", "")
+#
+# description = info["description"] + f" If you are viewing this on MPContribs-ML interactively, please ensure the order of the identifiers is sequential (mb-{mb_shortname}-0001, mb-{mb_shortname}-0002, etc.) before benchmarking."
+# if "For benchmarking" not in description:
+# print(name, description)
+#
+# has_structure = mb_shortname in [ds["name"] for ds in HAS_STRUCTURE]
+# primitive_key = "structure" if has_structure else "composition"
+# target = BENCHMARK_DICT[mb_shortname]["target"]
+#
+# print(client.projects.update_entry(
+# pk=name,
+# project={
+# "description": description,
+# 'other.columns': {
+# target_map[target]: metadata[name]["columns"][target],
+# primitive_key: metadata[name]["columns"][primitive_key]
+# }
+# }).result())
+
+
+
+
+# Entering all contributions to projects
+########################################
+
+
+# steels.........X
+# log_kvrh.......
+# log_gvrh.......
+# dielectric.....
+# jdft2d.........X
+# expt_gap.......X
+# expt_is_metal..X
+# phonons........
+# mp_is_metal....
+# mp_gap.........
+# glass..........X
+# mp_e_form......
+# perovskites....
+
+
+for ds in ["dielectric", "phonons", "mp_gap", "mp_is_metal", "perovskites", "mp_e_form"]:
+
+ ds_config = BENCHMARK_DICT[ds]
+
+ name = "matbench_" + ds_config["name"]
+ print(f"Loading {name}")
+ df = load_dataset(name)
+ target = ds_config["target"]
+ unit = f" {ds_config['unit']}" if ds_config["unit"] else ""
+
+
+ # print(f"Updating 'other' column entries of {name} with unicode.")
+ # print(client.projects.update_entry(pk=name, project={
+ # 'other.columns': {
+ # target_map[target]: metadata[name]["columns"][target],
+ # "structure": metadata[name]["columns"]["structure"]
+ # # "composition": metadata[name]["columns"]["composition"]
+ # }
+ # }).result())
+
+
+
+ # print(f"Deleting contributions of {name}")
+ # client.delete_contributions(name)
+
+
+ print(f"Assembling and uploading contributions for {name}")
+ structure_filename = "/Users/ardunn/Downloads/outfile.cif"
+ contributions = []
+ id_prefix = df.shape[0]
+
+
+ id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1
+ for i, row in tqdm.tqdm(enumerate(df.iterrows())):
+ entry = row[1]
+ contrib = {'project': name, 'is_public': True}
+
+ if "structure" in entry.index:
+ structures = []
+ s = entry.loc["structure"]
+ s.to("cif", structure_filename)
+ s = Structure.from_file(structure_filename)
+ c = s.composition.get_integer_formula_and_factor()[0]
+ contrib["structures"] = [s]
+
+ else:
+ c = entry["composition"]
+
+ id_number = f"{i+1:0{id_n_zeros}d}"
+ identifier = f"mb-{ds_config['name']}-{id_number}"
+ contrib["identifier"] = identifier
+
+ contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"}
+ contrib["formula"] = c
+ contributions.append(contrib)
+
+ client.submit_contributions(contributions, per_page=10)
\ No newline at end of file
diff --git a/docs/_sources/datasets.rst.txt b/docs/_sources/datasets.rst.txt
index 18262318..6777e5b1 100644
--- a/docs/_sources/datasets.rst.txt
+++ b/docs/_sources/datasets.rst.txt
@@ -195,7 +195,7 @@ procedures, etc.) on a dataset with :code:`matminer.datasets.get_all_dataset_inf
Description: Matbench v0.1 dataset for predicting steel yield strengths from chemical composition alone. Retrieved from Citrine informatics. Deduplicated.
Columns:
composition: Chemical formula.
- yield strength: Target variable. Experimentally measured steel yield strengths, in GPa.
+ yield strength: Target variable. Experimentally measured steel yield strengths, in MPa.
Num Entries: 312
Reference: https://citrination.com/datasets/153092/
Bibtex citations: ['@misc{Citrine Informatics,\ntitle = {Mechanical properties of some steels},\nhowpublished = {\\url{https://citrination.com/datasets/153092/},\n}']
diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt
index e0cb7bf4..99c9eb2c 100644
--- a/docs/_sources/index.rst.txt
+++ b/docs/_sources/index.rst.txt
@@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are:
- Contribute code! You can do this by forking
`Automatminer on Github Getting dataset infoContributing / Contact / SupportAutomatminer on Github
and submitting a pull request.
Post to our support forum. Don’t be shy, we look forward to feedback!
Post to our support forum. Don’t be shy, we look forward to feedback!
See our contribution guidelines
for more inspect. For a list of contributors, see our
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
index 18262318..6777e5b1 100644
--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
@@ -195,7 +195,7 @@ procedures, etc.) on a dataset with :code:`matminer.datasets.get_all_dataset_inf
Description: Matbench v0.1 dataset for predicting steel yield strengths from chemical composition alone. Retrieved from Citrine informatics. Deduplicated.
Columns:
composition: Chemical formula.
- yield strength: Target variable. Experimentally measured steel yield strengths, in GPa.
+ yield strength: Target variable. Experimentally measured steel yield strengths, in MPa.
Num Entries: 312
Reference: https://citrination.com/datasets/153092/
Bibtex citations: ['@misc{Citrine Informatics,\ntitle = {Mechanical properties of some steels},\nhowpublished = {\\url{https://citrination.com/datasets/153092/},\n}']
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e0cb7bf4..99c9eb2c 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are:
- Contribute code! You can do this by forking
`Automatminer on Github