diff --git a/automatminer_dev/config.py b/automatminer_dev/config.py index 2ce6044b..945b8c43 100644 --- a/automatminer_dev/config.py +++ b/automatminer_dev/config.py @@ -29,6 +29,7 @@ "target": "log10(K_VRH)", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": None, } LOG_GVRH = { @@ -37,6 +38,7 @@ "target": "log10(G_VRH)", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": None, } DIELECTRIC = { @@ -45,6 +47,7 @@ "target": "n", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": None, } JDFT2D = { @@ -53,6 +56,7 @@ "target": "exfoliation_en", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "meV/atom" } MP_GAP = { @@ -61,6 +65,7 @@ "target": "gap pbe", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV" } MP_IS_METAL = { @@ -69,6 +74,7 @@ "target": "is_metal", "problem_type": AMM_CLF_NAME, "clf_pos_label": True, + "unit": None } MP_E_FORM = { @@ -77,6 +83,7 @@ "target": "e_form", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV/atom" } PEROVSKITES = { @@ -85,6 +92,7 @@ "target": "e_form", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV" } GLASS = { @@ -93,6 +101,7 @@ "target": "gfa", "problem_type": AMM_CLF_NAME, "clf_pos_label": True, + "unit": None } EXPT_IS_METAL = { @@ -101,6 +110,7 @@ "target": "is_metal", "problem_type": AMM_CLF_NAME, "clf_pos_label": True, + "unit": None } EXPT_GAP = { @@ -109,6 +119,7 @@ "target": "gap expt", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV" } PHONONS = { @@ -117,6 +128,7 @@ "target": "last phdos peak", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "cm^-1" } STEELS = { @@ -125,6 +137,7 @@ "target": "yield strength", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "MPa" } BENCHMARK_DEBUG_SET = [JDFT2D, PHONONS, EXPT_IS_METAL, STEELS] @@ -143,3 +156,17 @@ STEELS, PHONONS, ] + +HAS_STRUCTURE = [ + LOG_KVRH, + LOG_GVRH, + DIELECTRIC, + JDFT2D, + MP_GAP, + MP_IS_METAL, + MP_E_FORM, + PEROVSKITES, + PHONONS +] + +BENCHMARK_DICT = {ds["name"]: ds for ds in BENCHMARK_FULL_SET} \ No newline at end of file diff --git a/automatminer_dev/matbench/dataset_creation/__init__.py b/automatminer_dev/matbench/dataset_creation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automatminer_dev/matbench/castelli.py b/automatminer_dev/matbench/dataset_creation/castelli.py similarity index 100% rename from automatminer_dev/matbench/castelli.py rename to automatminer_dev/matbench/dataset_creation/castelli.py diff --git a/automatminer_dev/matbench/dielectric.py b/automatminer_dev/matbench/dataset_creation/dielectric.py similarity index 100% rename from automatminer_dev/matbench/dielectric.py rename to automatminer_dev/matbench/dataset_creation/dielectric.py diff --git a/automatminer_dev/matbench/expt_gap.py b/automatminer_dev/matbench/dataset_creation/expt_gap.py similarity index 91% rename from automatminer_dev/matbench/expt_gap.py rename to automatminer_dev/matbench/dataset_creation/expt_gap.py index 9c79b767..c2907651 100644 --- a/automatminer_dev/matbench/expt_gap.py +++ b/automatminer_dev/matbench/dataset_creation/expt_gap.py @@ -73,6 +73,10 @@ df_new = df_new.reset_index(drop=True) +# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which +# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook, +# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru + store_dataframe_as_json(df_new, "expt_gap.json.gz", compression="gz") print(df_new) diff --git a/automatminer_dev/matbench/expt_is_metal.py b/automatminer_dev/matbench/dataset_creation/expt_is_metal.py similarity index 92% rename from automatminer_dev/matbench/expt_is_metal.py rename to automatminer_dev/matbench/dataset_creation/expt_is_metal.py index 5cafe935..a33c802c 100644 --- a/automatminer_dev/matbench/expt_is_metal.py +++ b/automatminer_dev/matbench/dataset_creation/expt_is_metal.py @@ -74,6 +74,10 @@ df_new["is_metal"] = df_new["is_metal"] == 1 +# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which +# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook, +# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru + store_dataframe_as_json(df_new, "expt_is_metal.json.gz", compression="gz") print(df_new) diff --git a/automatminer_dev/matbench/glass.py b/automatminer_dev/matbench/dataset_creation/glass.py similarity index 100% rename from automatminer_dev/matbench/glass.py rename to automatminer_dev/matbench/dataset_creation/glass.py diff --git a/automatminer_dev/matbench/jdft2d.py b/automatminer_dev/matbench/dataset_creation/jdft2d.py similarity index 100% rename from automatminer_dev/matbench/jdft2d.py rename to automatminer_dev/matbench/dataset_creation/jdft2d.py diff --git a/automatminer_dev/matbench/mp_eform.py b/automatminer_dev/matbench/dataset_creation/mp_eform.py similarity index 100% rename from automatminer_dev/matbench/mp_eform.py rename to automatminer_dev/matbench/dataset_creation/mp_eform.py diff --git a/automatminer_dev/matbench/mp_elasticity.py b/automatminer_dev/matbench/dataset_creation/mp_elasticity.py similarity index 100% rename from automatminer_dev/matbench/mp_elasticity.py rename to automatminer_dev/matbench/dataset_creation/mp_elasticity.py diff --git a/automatminer_dev/matbench/mp_gaps.py b/automatminer_dev/matbench/dataset_creation/mp_gaps.py similarity index 100% rename from automatminer_dev/matbench/mp_gaps.py rename to automatminer_dev/matbench/dataset_creation/mp_gaps.py diff --git a/automatminer_dev/matbench/phonons.py b/automatminer_dev/matbench/dataset_creation/phonons.py similarity index 100% rename from automatminer_dev/matbench/phonons.py rename to automatminer_dev/matbench/dataset_creation/phonons.py diff --git a/automatminer_dev/matbench/steels.py b/automatminer_dev/matbench/dataset_creation/steels.py similarity index 91% rename from automatminer_dev/matbench/steels.py rename to automatminer_dev/matbench/dataset_creation/steels.py index bc0233cb..71a10114 100644 --- a/automatminer_dev/matbench/steels.py +++ b/automatminer_dev/matbench/dataset_creation/steels.py @@ -8,6 +8,8 @@ from matminer.datasets.dataset_retrieval import load_dataset + +# Note the units are in MPa, NOT GPa if __name__ == "__main__": df = load_dataset("steel_strength") df = df[["formula", "yield strength"]] diff --git a/automatminer_dev/matbench/docs/__init__.py b/automatminer_dev/matbench/docs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automatminer_dev/matbench/get_info.py b/automatminer_dev/matbench/docs/formatting_matbench_table.py similarity index 87% rename from automatminer_dev/matbench/get_info.py rename to automatminer_dev/matbench/docs/formatting_matbench_table.py index 8a14ecfe..bd1ee747 100644 --- a/automatminer_dev/matbench/get_info.py +++ b/automatminer_dev/matbench/docs/formatting_matbench_table.py @@ -1,4 +1,12 @@ from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info + + +''' + +Helper function to format matbench documentation page. +''' + + datasets = get_available_datasets(print_format=None) for dataset in datasets: diff --git a/automatminer_dev/matbench/mpcontribs/__init__.py b/automatminer_dev/matbench/mpcontribs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py new file mode 100644 index 00000000..bca132a7 --- /dev/null +++ b/automatminer_dev/matbench/mpcontribs/upload.py @@ -0,0 +1,219 @@ +import wget, json, os, math +from string import capwords +from pybtex.database import parse_string +import pybtex.errors +from mpcontribs.client import Client +from pymatgen import MPRester, Structure +import tqdm +import pprint + +# from matminer.datasets.dataset_retrieval import ( +# get_all_dataset_info, +# get_available_datasets, +# load_dataset, +# ) + +from matminer.datasets import load_dataset + +from automatminer_dev.config import DIELECTRIC, JDFT2D, PEROVSKITES, STEELS, BENCHMARK_FULL_SET, BENCHMARK_DICT, HAS_STRUCTURE + + +pybtex.errors.set_strict_mode(False) +api_key = os.environ["MPCONTRIBS_API_KEY"] +client = Client(api_key, host='ml-api.materialsproject.cloud') +mprester = MPRester() + + +# client.get_project("matbench_steels").pretty() + + +fn = 'dataset_metadata.json' +if not os.path.exists(fn): + wget.download(f'https://raw.githubusercontent.com/hackingmaterials/matminer/master/matminer/datasets/{fn}') +metadata = json.load(open(fn, 'r')) +metadata = {k: d for k, d in metadata.items() if "matbench" in k} + + + +# Creating new projects +####################### +# todo: might not have access to add new projects +# for name, info in metadata.items(): +# +# print(f"Uploading {name}") +# +# columns = {} +# for col, text in info['columns'].items(): +# k = col.replace('_', '|').replace('-', '|').replace('(', ' ').replace( +# ')', '') +# columns[k] = text +# +# project = { +# 'is_public': True, +# 'owner': 'ardunn@lbl.gov', +# "name": name, +# 'title': name, # TODO update and set long_title +# 'authors': 'A. Dunn, A. Jain', +# 'description': info['description'], +# 'other': { +# 'columns': columns, +# 'entries': info['num_entries'] +# }, +# 'references': [] +# } +# +# for ref in info['bibtex_refs']: +# +# if name == "matbench_phonons": +# ref = ref.replace( +# "petretto_dwaraknath_miranda_winston_giantomassi_rignanese_van setten_gonze_persson_hautier_2018", +# "petretto2018") +# +# bib = parse_string(ref, 'bibtex') +# for key, entry in bib.entries.items(): +# key_is_doi = key.startswith('doi:') +# url = 'https://doi.org/' + key.split(':', 1)[ +# -1] if key_is_doi else entry.fields.get('url') +# k = 'Zhuo2018' if key_is_doi else capwords(key.replace('_', '')) +# if k.startswith('C2'): +# k = 'Castelli2012' +# elif k.startswith('Landolt'): +# k = 'LB1997' +# elif k == 'Citrine': +# url = 'https://www.citrination.com' +# +# if len(k) > 8: +# k = k[:4] + k[-4:] +# project['references'].append({"label": k, "url": url}) +# +# try: +# print(client.projects.create_entry(project=project).result()) +# except Exception as ex: +# print( +# ex) # TODO should use get_entry to check existence -> use update_entry if project exists + + + + + +# Map of canonical yet non-mpcontribs-compatible tagret nams to compatible (unicode, no punctuation) target names +target_map = { + "yield strength": "σᵧ", + "log10(K_VRH)": "log₁₀Kᵛʳʰ", + "log10(G_VRH)": "log₁₀Gᵛʳʰ", + "n": "𝑛", + "exfoliation_en": "Eˣ", + "gap pbe": "Eᵍ", + "is_metal": "metallic", + "e_form": "Eᶠ", + "gfa": "glass", + "gap expt": "Eᵍ", + "last phdos peak": "ωᵐᵃˣ", +} + + +# # Getting project-level metadata in order +# ######################################### +# +# # Add warning to mpcontribs since the results will be stored out of order. +# # Also, fix columns for new mpcontribs deployment +# for name, info in metadata.items(): +# mb_shortname = name.replace("matbench_", "") +# +# description = info["description"] + f" If you are viewing this on MPContribs-ML interactively, please ensure the order of the identifiers is sequential (mb-{mb_shortname}-0001, mb-{mb_shortname}-0002, etc.) before benchmarking." +# if "For benchmarking" not in description: +# print(name, description) +# +# has_structure = mb_shortname in [ds["name"] for ds in HAS_STRUCTURE] +# primitive_key = "structure" if has_structure else "composition" +# target = BENCHMARK_DICT[mb_shortname]["target"] +# +# print(client.projects.update_entry( +# pk=name, +# project={ +# "description": description, +# 'other.columns': { +# target_map[target]: metadata[name]["columns"][target], +# primitive_key: metadata[name]["columns"][primitive_key] +# } +# }).result()) + + + + +# Entering all contributions to projects +######################################## + + +# steels.........X +# log_kvrh....... +# log_gvrh....... +# dielectric..... +# jdft2d.........X +# expt_gap.......X +# expt_is_metal..X +# phonons........ +# mp_is_metal.... +# mp_gap......... +# glass..........X +# mp_e_form...... +# perovskites.... + + +for ds in ["dielectric", "phonons", "mp_gap", "mp_is_metal", "perovskites", "mp_e_form"]: + + ds_config = BENCHMARK_DICT[ds] + + name = "matbench_" + ds_config["name"] + print(f"Loading {name}") + df = load_dataset(name) + target = ds_config["target"] + unit = f" {ds_config['unit']}" if ds_config["unit"] else "" + + + # print(f"Updating 'other' column entries of {name} with unicode.") + # print(client.projects.update_entry(pk=name, project={ + # 'other.columns': { + # target_map[target]: metadata[name]["columns"][target], + # "structure": metadata[name]["columns"]["structure"] + # # "composition": metadata[name]["columns"]["composition"] + # } + # }).result()) + + + + # print(f"Deleting contributions of {name}") + # client.delete_contributions(name) + + + print(f"Assembling and uploading contributions for {name}") + structure_filename = "/Users/ardunn/Downloads/outfile.cif" + contributions = [] + id_prefix = df.shape[0] + + + id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1 + for i, row in tqdm.tqdm(enumerate(df.iterrows())): + entry = row[1] + contrib = {'project': name, 'is_public': True} + + if "structure" in entry.index: + structures = [] + s = entry.loc["structure"] + s.to("cif", structure_filename) + s = Structure.from_file(structure_filename) + c = s.composition.get_integer_formula_and_factor()[0] + contrib["structures"] = [s] + + else: + c = entry["composition"] + + id_number = f"{i+1:0{id_n_zeros}d}" + identifier = f"mb-{ds_config['name']}-{id_number}" + contrib["identifier"] = identifier + + contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"} + contrib["formula"] = c + contributions.append(contrib) + + client.submit_contributions(contributions, per_page=10) \ No newline at end of file diff --git a/docs/_sources/datasets.rst.txt b/docs/_sources/datasets.rst.txt index 18262318..6777e5b1 100644 --- a/docs/_sources/datasets.rst.txt +++ b/docs/_sources/datasets.rst.txt @@ -195,7 +195,7 @@ procedures, etc.) on a dataset with :code:`matminer.datasets.get_all_dataset_inf Description: Matbench v0.1 dataset for predicting steel yield strengths from chemical composition alone. Retrieved from Citrine informatics. Deduplicated. Columns: composition: Chemical formula. - yield strength: Target variable. Experimentally measured steel yield strengths, in GPa. + yield strength: Target variable. Experimentally measured steel yield strengths, in MPa. Num Entries: 312 Reference: https://citrination.com/datasets/153092/ Bibtex citations: ['@misc{Citrine Informatics,\ntitle = {Mechanical properties of some steels},\nhowpublished = {\\url{https://citrination.com/datasets/153092/},\n}'] diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt index e0cb7bf4..99c9eb2c 100644 --- a/docs/_sources/index.rst.txt +++ b/docs/_sources/index.rst.txt @@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are: - Contribute code! You can do this by forking `Automatminer on Github `_ and submitting a pull request. -- Post to our `support forum `_. Don't be shy, we look forward to feedback! +- Post to our `support forum `_. Don't be shy, we look forward to feedback! See our `contribution guidelines `_ diff --git a/docs/datasets.html b/docs/datasets.html index d1522424..61412fb2 100644 --- a/docs/datasets.html +++ b/docs/datasets.html @@ -292,7 +292,7 @@

Getting dataset infoContributing / Contact / SupportAutomatminer on Github and submitting a pull request.

-
  • Post to our support forum. Don’t be shy, we look forward to feedback!

  • +
  • Post to our support forum. Don’t be shy, we look forward to feedback!

  • See our contribution guidelines for more inspect. For a list of contributors, see our diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 18262318..6777e5b1 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -195,7 +195,7 @@ procedures, etc.) on a dataset with :code:`matminer.datasets.get_all_dataset_inf Description: Matbench v0.1 dataset for predicting steel yield strengths from chemical composition alone. Retrieved from Citrine informatics. Deduplicated. Columns: composition: Chemical formula. - yield strength: Target variable. Experimentally measured steel yield strengths, in GPa. + yield strength: Target variable. Experimentally measured steel yield strengths, in MPa. Num Entries: 312 Reference: https://citrination.com/datasets/153092/ Bibtex citations: ['@misc{Citrine Informatics,\ntitle = {Mechanical properties of some steels},\nhowpublished = {\\url{https://citrination.com/datasets/153092/},\n}'] diff --git a/docs/source/index.rst b/docs/source/index.rst index e0cb7bf4..99c9eb2c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are: - Contribute code! You can do this by forking `Automatminer on Github `_ and submitting a pull request. -- Post to our `support forum `_. Don't be shy, we look forward to feedback! +- Post to our `support forum `_. Don't be shy, we look forward to feedback! See our `contribution guidelines `_ diff --git a/requirements.txt b/requirements.txt index 3c1198a6..5b8ce393 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ # Direct requirements of this project alone matminer==0.6.2 -pymatgen==2020.01.28 tpot==0.11.0 skrebate==0.6 pyyaml==5.1.2 diff --git a/requirements_dev.txt b/requirements_dev.txt index 19506027..37ca2ad9 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -7,3 +7,5 @@ isort==4.3.21 pre-commit==1.18.3 paramiko==2.6.0 scp==0.13.2 +pcontribs-client==3.3.0 +wget==3.2 \ No newline at end of file