Skip to content

Commit

Permalink
ENH: Do not abort fast-sigma computations if a crash is encountered f…
Browse files Browse the repository at this point in the history
…or a single property
  • Loading branch information
Bas van Beek committed Jun 7, 2022
1 parent 1f88974 commit 0d7ceb6
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 108 deletions.
174 changes: 68 additions & 106 deletions nanoCAT/recipes/fast_sigma.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def get_compkf(
smiles: str,
directory: None | str | os.PathLike[str] = None,
name: None | str = None,
) -> None | str:
) -> str:
"""Estimate the sigma profile of a SMILES string using the COSMO-RS fast-sigma method.
See the COSMO-RS `docs <https://www.scm.com/doc/COSMO-RS/Fast_Sigma_QSPR_COSMO_sigma-profiles.html>`_ for more details.
Expand Down Expand Up @@ -148,14 +148,14 @@ def get_compkf(

command = f'"$AMSBIN"/fast_sigma --smiles "{smiles}" -o "{kf_file}"'
output = _run(command, smiles, err_msg="Failed to compute the sigma profile of {!r}")
return kf_file if output is not None else None
return kf_file


def get_fast_sigma_properties(
smiles: str,
directory: None | str | os.PathLike[str] = None,
name: None | str = None,
) -> None | str:
) -> None:
"""Calculate various pure-compound properties with the COSMO-RS property prediction program.
See the COSMO-RS `docs <https://www.scm.com/doc/COSMO-RS/Property_Prediction.html>`_ for more details.
Expand All @@ -171,43 +171,34 @@ def get_fast_sigma_properties(
The name of the to-be created .compkf file (excluding extensions).
If :data:`None`, use **smiles**.
Returns
-------
:class:`str`, optional
The absolute path to the created ``.compkf`` file.
:data:`None` will be returned if an error is raised by AMS.
""" # noqa: E501
filename = smiles if name is None else name
if directory is None:
directory = os.getcwd()
kf_file = os.path.join(directory, f'{filename}.compkf')

command = f'"$AMSBIN"/prop_prediction --smiles "{smiles}" -o "{kf_file}"'
output = _run(
_run(
command, smiles,
err_msg="Failed to compute the pure compound properties of {!r}",
)
return kf_file if output is not None else None


def _run(command: str, smiles: str, err_msg: str) -> None | subprocess.CompletedProcess[bytes]:
def _run(command: str, smiles: str, err_msg: str) -> None | subprocess.CompletedProcess[str]:
"""Run **command** and return the the status."""
try:
status = subprocess.run(command, shell=True, check=True, capture_output=True)
status = subprocess.run(command, shell=True, check=True, text=True, capture_output=True)
stderr = status.stderr
stdout = status.stdout
if stderr:
raise RuntimeError(stderr.decode())
elif b"WARNING" in stdout:
raise RuntimeError(stdout.decode().strip("\n").rstrip("\n"))
raise RuntimeError(stderr.strip())
elif "WARNING" in stdout:
raise RuntimeError(stdout.strip())
except (RuntimeError, subprocess.SubprocessError) as ex:
warn = RuntimeWarning(err_msg.format(smiles))
warn.__cause__ = ex
warnings.warn(warn, stacklevel=1)
return None
else:
return status
return status


def _hash_smiles(smiles: str) -> str:
Expand All @@ -218,27 +209,24 @@ def _hash_smiles(smiles: str) -> str:
def _get_compkf(
smiles_iter: Iterable[str],
directory: str | os.PathLike[str],
) -> _NDArray[np.object_]:
) -> list[str]:
"""Wrap :func:`get_compkf` in a for-loop."""
lst = [get_compkf(smiles, directory, name=_hash_smiles(smiles)) for smiles in smiles_iter]
return np.array(lst, dtype=np.object_)
return [get_compkf(smiles, directory, name=_hash_smiles(smiles)) for smiles in smiles_iter]


def _get_fast_sigma_properties(
smiles_iter: Iterable[str],
directory: str | os.PathLike[str],
) -> _NDArray[np.object_]:
) -> None:
"""Wrap :func:`get_fast_sigma_properties` in a for-loop."""
lst = [get_fast_sigma_properties(smiles, directory, name=_hash_smiles(smiles)) for
smiles in smiles_iter]
return np.array(lst, dtype=np.object_)
for smiles in smiles_iter:
get_fast_sigma_properties(smiles, directory, name=_hash_smiles(smiles))


def _set_properties(
df: pd.DataFrame,
solutes: _NDArray[np.object_],
solutes: list[str],
solvents: Mapping[str, str],
prop_mask: _NDArray[np.bool_],
) -> None:
df["LogP", None] = _get_logp(solutes)

Expand All @@ -248,116 +236,90 @@ def _set_properties(
("Solvation Energy", name),
]] = _get_gamma_e(solutes, solv, name)

prop_array = _get_compkf_prop(solutes, prop_mask)
prop_array = _get_compkf_prop(solutes)
iterator = ((k, prop_array[k]) for k in prop_array.dtype.fields)
for k, v in iterator:
df[k, None] = v


def _get_compkf_prop(
solutes: _NDArray[np.object_],
prop_mask: _NDArray[np.bool_],
) -> _NDArray[np.void]:
def _get_compkf_prop(solutes: list[str]) -> _NDArray[np.void]:
"""Extract all (potentially) interesting properties from the compkf file."""
prop_iter: list[tuple[str, str, type | str]] = [
("Compound Data", "Formula", "U160"),
("Compound Data", "Molar Mass", np.float64),
("Compound Data", "Nring", np.int64),
("PROPPREDICTION", "boilingpoint", np.float64),
("PROPPREDICTION", "criticalpressure", np.float64),
("PROPPREDICTION", "criticaltemp", np.float64),
("PROPPREDICTION", "criticalvol", np.float64),
("PROPPREDICTION", "density", np.float64),
("PROPPREDICTION", "dielectricconstant", np.float64),
("PROPPREDICTION", "entropygas", np.float64),
("PROPPREDICTION", "flashpoint", np.float64),
("PROPPREDICTION", "gidealgas", np.float64),
("PROPPREDICTION", "hcombust", np.float64),
("PROPPREDICTION", "hformstd", np.float64),
("PROPPREDICTION", "hfusion", np.float64),
("PROPPREDICTION", "hidealgas", np.float64),
("PROPPREDICTION", "hsublimation", np.float64),
("PROPPREDICTION", "meltingpoint", np.float64),
("PROPPREDICTION", "molarvol", np.float64),
("PROPPREDICTION", "parachor", np.float64),
("PROPPREDICTION", "solubilityparam", np.float64),
("PROPPREDICTION", "tpt", np.float64),
("PROPPREDICTION", "vdwarea", np.float64),
("PROPPREDICTION", "vdwvol", np.float64),
("PROPPREDICTION", "vaporpressure", np.float64),
prop_iter: list[tuple[str, Any, str, type | str]] = [
("Compound Data", "", "Formula", "U160"),
("Compound Data", np.nan, "Molar Mass", np.float64),
("Compound Data", 0, "Nring", np.int64),
("PROPPREDICTION", np.nan, "boilingpoint", np.float64),
("PROPPREDICTION", np.nan, "criticalpressure", np.float64),
("PROPPREDICTION", np.nan, "criticaltemp", np.float64),
("PROPPREDICTION", np.nan, "criticalvol", np.float64),
("PROPPREDICTION", np.nan, "density", np.float64),
("PROPPREDICTION", np.nan, "dielectricconstant", np.float64),
("PROPPREDICTION", np.nan, "entropygas", np.float64),
("PROPPREDICTION", np.nan, "flashpoint", np.float64),
("PROPPREDICTION", np.nan, "gidealgas", np.float64),
("PROPPREDICTION", np.nan, "hcombust", np.float64),
("PROPPREDICTION", np.nan, "hformstd", np.float64),
("PROPPREDICTION", np.nan, "hfusion", np.float64),
("PROPPREDICTION", np.nan, "hidealgas", np.float64),
("PROPPREDICTION", np.nan, "hsublimation", np.float64),
("PROPPREDICTION", np.nan, "meltingpoint", np.float64),
("PROPPREDICTION", np.nan, "molarvol", np.float64),
("PROPPREDICTION", np.nan, "parachor", np.float64),
("PROPPREDICTION", np.nan, "solubilityparam", np.float64),
("PROPPREDICTION", np.nan, "tpt", np.float64),
("PROPPREDICTION", np.nan, "vdwarea", np.float64),
("PROPPREDICTION", np.nan, "vdwvol", np.float64),
("PROPPREDICTION", np.nan, "vaporpressure", np.float64),
]

dtype = np.dtype([i[1:] for i in prop_iter])
fill_value = np.array(tuple(
(np.nan if field_dtype == np.float64 else np.dtype(field_dtype).type())
for *_, field_dtype in prop_iter
), dtype=dtype)
ret = np.full_like(solutes, fill_value, dtype=dtype)

mask = prop_mask & (solutes != None)
if not mask.any():
return ret
dtype = np.dtype([i[2:] for i in prop_iter])
fill_value = np.array(tuple(fill for _, fill, _, field_dtype in prop_iter), dtype=dtype)
ret = np.full(len(solutes), fill_value, dtype=dtype)

iterator = ((i, KFFile(f), f) for i, (f, m) in enumerate(zip(solutes, mask)) if m)
iterator = ((i, KFFile(f), f) for i, f in enumerate(solutes))
for i, kf, file in iterator: # type: int, KFFile, str
for section, variable, _ in prop_iter:
if kf.reader is None:
warn = RuntimeWarning(f"No such file or directory: {file!r}")
continue

for section, _, variable, _ in prop_iter:
try:
ret[variable][i] = kf.read(section, variable)
except Exception as ex:
if kf.reader is None:
warn = RuntimeWarning(f"No such file or directory: {file!r}")
else:
smiles = kf.read("Compound Data", "SMILES").strip("\x00")
warn = RuntimeWarning(
f'Failed to extract the "{section}%{variable}" property of {smiles!r}'
)
smiles = kf.read("Compound Data", "SMILES").strip("\x00")
warn = RuntimeWarning(
f'Failed to extract the "{section}%{variable}" property of {smiles!r}'
)
warn.__cause__ = ex
warnings.warn(warn)
return ret


def _get_logp(solutes: _NDArray[np.object_]) -> _NDArray[np.float64]:
def _get_logp(solutes: list[str]) -> _NDArray[np.float64]:
"""Perform a LogP calculation."""
ret = np.full_like(solutes, np.nan, dtype=np.float64)
mask = solutes != None
count = np.count_nonzero(mask)
if count == 0:
return ret

s = copy.deepcopy(LOGP_SETTINGS)
for v in s.input.compound[:2]:
v._h = v._h.format(os.environ["AMSRESOURCES"])
s.input.compound += [Settings({"_h": f'"{sol}"', "_1": "compkffile"}) for sol in solutes[mask]]

ret[mask] = _run_crs(
s, count,
logp=lambda r: r.readkf('LOGP', 'logp')[2:],
s.input.compound += [Settings({"_h": f'"{sol}"', "_1": "compkffile"}) for sol in solutes]
return _run_crs(
s, len(solutes), logp=lambda r: r.readkf('LOGP', 'logp')[2:],
)
return ret


def _get_gamma_e(
solutes: _NDArray[np.object_],
solutes: list[str],
solvent: str,
solvent_name: str,
) -> _NDArray[np.float64]:
"""Perform an activity coefficient and solvation energy calculation."""
ret = np.full((len(solutes), 2), np.nan, dtype=np.float64)
mask = solutes != None
count = np.count_nonzero(mask)
if count == 0:
return ret

s = copy.deepcopy(GAMMA_E_SETTINGS)
s.input.compound[0]._h = f'"{solvent}"'
s.input.compound += [Settings({"_h": f'"{sol}"', "_1": "compkffile"}) for sol in solutes[mask]]

ret[mask] = _run_crs(
s, count, solvent_name,
s.input.compound += [Settings({"_h": f'"{sol}"', "_1": "compkffile"}) for sol in solutes]
return _run_crs(
s, len(solutes), solvent_name,
activity_coefficient=lambda r: r.readkf('ACTIVITYCOEF', 'gamma')[1:],
solvation_energy=lambda r: r.readkf('ACTIVITYCOEF', 'deltag')[1:],
)
return ret


def _run_crs(
Expand Down Expand Up @@ -433,9 +395,9 @@ def _inner_loop(
config.log.update(log)
config.job.pickle = False

compkf_array = _get_compkf(index, workdir)
prop_mask: _NDArray[np.bool_] = _get_fast_sigma_properties(index, workdir) != None
_set_properties(df, compkf_array, solvents, prop_mask)
compkf_list = _get_compkf(index, workdir)
_get_fast_sigma_properties(index, workdir)
_set_properties(df, compkf_list, solvents)

df.sort_index(axis=1, inplace=True)
df.to_csv(df_filename)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_fast_sigma_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def compare_df(df1: pd.DataFrame, df2: pd.DataFrame) -> None:
np.testing.assert_array_equal(df1.columns, df2.columns, err_msg="columns")
np.testing.assert_array_equal(df1.index, df2.index, err_msg="index")

iterator = ((k, df1[k], df2[k]) for k in df1.keys())
iterator = ((repr(k), df1[k], df2[k]) for k in df1.keys())
for k, v1, v2 in iterator:
if issubclass(v2.dtype.type, np.inexact):
np.testing.assert_allclose(v1, v2, err_msg=k)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_files/cosmo-rs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ solvent,octanol,water,nan,nan,nan,nan,octanol,water,nan,nan,nan,nan,nan,nan,nan,
smiles,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
CCCO[H],0.9059515885100106,47.502560265094864,C3H8O,0.8598639266703433,60.057514876,0,-4.907177123884496,-3.779866847103011,364.396484375,51.321434020996094,483.5383605957031,0.20692114531993866,0.7684714794158936,11.351513862609863,319.4388122558594,301.9580993652344,-153.78858947753906,-1855.157958984375,-300.0579528808594,9.660120964050293,-253.54257202148438,60.9822998046875,213.6103973388672,0.07815191149711609,169.25833129882812,10.308341026306152,212.8401641845703,0.030887117233704114,103.73080444335938,69.73824310302734
CCO[H],0.9809559977875152,12.735229092215517,C2H6O,0.27727498634872677,46.041864812,0,-4.184213833748828,-3.8839858460959533,337.62591552734375,60.94455337524414,442.0007629394531,0.1411868929862976,0.7520784139633179,11.484953880310059,279.29022216796875,287.29193115234375,-161.09495544433594,-1247.503662109375,-271.9477844238281,6.711687088012695,-232.44967651367188,54.316436767578125,207.76678466796875,0.06121950224041939,129.3246612548828,10.367997169494629,206.98556518554688,0.09977969105921916,81.28819274902344,52.81121063232422
CO[H],1.0458910400849082,4.954782210274083,,-0.1431131720223538,,0,-2.9773539103786146,-3.2744200551256926,,,,,,,,,,,,,,,,,,,,,,
CO[H],1.0458910361889755,4.9547821918156725,CH4O,-0.1431131734865134,32.026214748,0,-2.977353912585617,-3.2744200573329225,331.949462890625,77.95111846923828,400.227294921875,0.0,0.6669917702674866,8.475790023803711,214.83251953125,284.8525390625,-79.22380828857422,-798.9734497070312,-177.14820861816406,6.536450386047363,-140.56654357910156,47.624053955078125,245.000244140625,0.0480159074068069,100.24918365478516,11.353303909301758,244.3812713623047,0.253253090460226,59.46268844604492,37.41834259033203

0 comments on commit 0d7ceb6

Please sign in to comment.