Skip to content

Commit

Permalink
minor punctuation
Browse files Browse the repository at this point in the history
  • Loading branch information
Anton Kulaga committed Aug 3, 2022
1 parent 9bd42a4 commit 37a1d90
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .dvc/config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[core]
remote = public
['remote "species"']
url = gdrive://0APuU2w04mc7qUk9PVA/repositgiories/species
url = gdrive://0APuU2w04mc7qUk9PVA/repositories/species
['remote "public"']
url = gdrive://1fwpl8eUsyfZUmeO_sRg0u-nmvXxX7kiS
7 changes: 3 additions & 4 deletions yspecies/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pandas as pd
from dataclasses import dataclass


class ExpressionDataset:
'''
ExpressionDataset class to handle: samples, species, genes and expressions
Expand Down Expand Up @@ -137,7 +138,6 @@ def get_label(self, label: str) -> pd.DataFrame:
else:
assert label in self.genes.columns.to_list(), f"cannot find label {label} anywhere!"


def extended_samples(self, samples_columns: List[str] = None, species_columns: List[str] = None):
'''
Merges samples with species dataframes
Expand Down Expand Up @@ -280,6 +280,7 @@ def collect(self, collect_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> 'Expre
def min_max_trait(self, trait: str) -> List:
return [self.species[trait].idxmin(), self.species[trait].idxmax()]


@dataclass(frozen=True)
class SpeciesIndexes:
"""
Expand Down Expand Up @@ -321,7 +322,6 @@ def collect(self, collect_fun: Callable[[pd.DataFrame], pd.DataFrame]):
return ExpressionDataset(self.dataset.name, upd_expressions, upd_samples, upd_species, upd_genes, upd_genes_meta)



@dataclass(frozen=True)
class SamplesIndexes:

Expand All @@ -345,7 +345,6 @@ def collect(self, filter_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> Express
#upd_expressions = upd_expressions.reindex(upd_samples.index)
return ExpressionDataset(self.dataset.name, upd_expressions, upd_samples, upd_species, upd_genes, upd_genes_meta)


def filter(self, filter_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> ExpressionDataset:
'''
Function to filter DataSet samples (and filter related data in expressionda dataframe) according to the lambda provided
Expand All @@ -354,7 +353,6 @@ def filter(self, filter_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> Expressi
'''
return self.collect(lambda df: self.dataset.samples[filter_fun(df)])


def __getitem__(self, item) -> ExpressionDataset:
'''
Samples index function
Expand All @@ -378,6 +376,7 @@ def _repr_html_(self):
f"<tr><td>{str(self.dataset.samples.shape[0])}</td></tr>" \
f"</table>"


@dataclass(frozen=True)
class GenesIndexes:

Expand Down
9 changes: 3 additions & 6 deletions yspecies/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ class BasicMetrics:
huber: float

@staticmethod
def from_dict(dict: Dict):
return BasicMetrics(dict["l1"], dict["l2"], dict["huber"])
def from_dict(dic: Dict[str, float]):
return BasicMetrics(dic["l1"], dic["l2"], dic["huber"])

@staticmethod
def from_dict(dict: Dict, row: int):
def from_dict(dict: Dict[Dict], row: int):
return BasicMetrics(dict["l1"][row], dict["l2"][row], dict["huber"][row])

@staticmethod
Expand Down Expand Up @@ -91,7 +91,6 @@ def take_best(results: List['ResultsCV'], metrics: str = "huber", last: bool = F
result = value if result is None or value < result else result
return result


@cached_property
def keys(self):
return list(self.evaluation.keys())
Expand All @@ -104,11 +103,9 @@ def mins(self):
def latest(self):
return {k: (np.array(self.evaluation[k])[-1]) for k in self.keys}


def min(self, metrics: str) -> float:
return self.mins[metrics] if metrics in self.mins else self.mins[metrics+"-mean"]


def last(self, metrics: str) -> float:
return self.latest[metrics] if metrics in self.latest else self.latest[metrics+"-mean"]

Expand Down
1 change: 1 addition & 0 deletions yspecies/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def fit(self, X, y=None) -> 'DataPartitioner':

def transform(self, for_partition: Tuple[EncodedFeatures, PartitionParameters]) -> ExpressionPartitions:
'''
:param for_partition:
:param data: ExpressionDataset
:param k: number of k-folds in sorted stratification
:return: partitions
Expand Down
2 changes: 0 additions & 2 deletions yspecies/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ def _repr_html_(self):
f"</table>"




class EncodedFeatures:

def __init__(self, features: FeatureSelection, samples: pd.DataFrame, genes_meta: pd.DataFrame = None):
Expand Down
5 changes: 3 additions & 2 deletions yspecies/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def _repr_html_(self):
f"<tr><td>{self.metrics}</td><td>str({self.validation_species})</td><td>{str(self.shap_dataframe.shape)}</td><td>{str(self.shap_absolute_sum_non_zero.shape)}</td><td>{self.eval_metrics}</td></tr>" \
f"</table>"


@dataclass
class CrossValidator(TransformerMixin):
early_stopping_rounds: int = 10
Expand Down Expand Up @@ -166,7 +167,7 @@ def fit(self, to_fit: Tuple[ExpressionPartitions, Dict], y=None) -> 'CrossValida
return self

def regression_model(self, X_train, X_test, y_train, y_test, parameters: Dict, categorical=None,
num_boost_round: int = 250, seed: int = None) -> Booster:
num_boost_round: int = 250, seed: int = None) -> [Booster, list[BasicMetrics]]:
'''
trains a regression model
:param X_train:
Expand All @@ -188,7 +189,7 @@ def regression_model(self, X_train, X_test, y_train, y_test, parameters: Dict, c
gbm = lgb.train(parameters,
lgb_train,
num_boost_round=num_boost_round,
valid_sets=lgb_eval,
valid_sets=[lgb_eval],
evals_result=evals_result,
verbose_eval=num_boost_round,
callbacks=[stopping_callback]
Expand Down
2 changes: 1 addition & 1 deletion yspecies/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def fit(self, X, y=None):
def transform(self, data: Iterable) -> Any:
return self.fold([d for d in data if self.filter(d)])


@dataclass(frozen=True)
class Repeat(TransformerMixin):
transformer: Union[TransformerMixin, Pipeline]
Expand All @@ -86,7 +87,6 @@ class TupleWith(TransformerMixin):
map_left: Callable[[Any], Any] = field(default_factory=lambda: lambda x: x)
map_right: Callable[[Any], Any] = field(default_factory=lambda: lambda x: x)


def fit(self, X, y = None):
return self

Expand Down

0 comments on commit 37a1d90

Please sign in to comment.