minor punctuation

antonkulaga · Aug 3, 2022 · 37a1d90 · 37a1d90
1 parent 9bd42a4
commit 37a1d90
Show file tree

Hide file tree

Showing 7 changed files with 12 additions and 16 deletions.
diff --git a/.dvc/config b/.dvc/config
@@ -1,6 +1,6 @@
 [core]
     remote = public
 ['remote "species"']
-    url = gdrive://0APuU2w04mc7qUk9PVA/repositgiories/species
+    url = gdrive://0APuU2w04mc7qUk9PVA/repositories/species
 ['remote "public"']
     url = gdrive://1fwpl8eUsyfZUmeO_sRg0u-nmvXxX7kiS
diff --git a/yspecies/dataset.py b/yspecies/dataset.py
@@ -14,6 +14,7 @@
 import pandas as pd
 from dataclasses import dataclass
 
+
 class ExpressionDataset:
     '''
     ExpressionDataset class to handle: samples, species, genes and expressions
@@ -137,7 +138,6 @@ def get_label(self, label: str) -> pd.DataFrame:
         else:
             assert label in self.genes.columns.to_list(), f"cannot find label {label} anywhere!"
 
-
     def extended_samples(self, samples_columns: List[str] = None, species_columns: List[str] = None):
         '''
         Merges samples with species dataframes
@@ -280,6 +280,7 @@ def collect(self, collect_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> 'Expre
     def min_max_trait(self, trait: str) -> List:
         return [self.species[trait].idxmin(), self.species[trait].idxmax()]
 
+
 @dataclass(frozen=True)
 class SpeciesIndexes:
     """
@@ -321,7 +322,6 @@ def collect(self, collect_fun: Callable[[pd.DataFrame], pd.DataFrame]):
         return ExpressionDataset(self.dataset.name, upd_expressions, upd_samples, upd_species,  upd_genes, upd_genes_meta)
 
 
-
 @dataclass(frozen=True)
 class SamplesIndexes:
 
@@ -345,7 +345,6 @@ def collect(self, filter_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> Express
         #upd_expressions = upd_expressions.reindex(upd_samples.index)
         return ExpressionDataset(self.dataset.name, upd_expressions, upd_samples, upd_species,  upd_genes, upd_genes_meta)
 
-
     def filter(self, filter_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> ExpressionDataset:
         '''
         Function to filter DataSet samples (and filter related data in expressionda dataframe) according to the lambda provided
@@ -354,7 +353,6 @@ def filter(self, filter_fun: Callable[[pd.DataFrame], pd.DataFrame]) -> Expressi
         '''
         return self.collect(lambda df: self.dataset.samples[filter_fun(df)])
 
-
     def __getitem__(self, item) -> ExpressionDataset:
         '''
         Samples index function
@@ -378,6 +376,7 @@ def _repr_html_(self):
                f"<tr><td>{str(self.dataset.samples.shape[0])}</td></tr>" \
                f"</table>"
 
+
 @dataclass(frozen=True)
 class GenesIndexes:
 

diff --git a/yspecies/models.py b/yspecies/models.py
@@ -15,11 +15,11 @@ class BasicMetrics:
     huber: float
 
     @staticmethod
-    def from_dict(dict: Dict):
-        return BasicMetrics(dict["l1"], dict["l2"], dict["huber"])
+    def from_dict(dic: Dict[str, float]):
+        return BasicMetrics(dic["l1"], dic["l2"], dic["huber"])
 
     @staticmethod
-    def from_dict(dict: Dict, row: int):
+    def from_dict(dict: Dict[Dict], row: int):
         return BasicMetrics(dict["l1"][row], dict["l2"][row], dict["huber"][row])
 
     @staticmethod
@@ -91,7 +91,6 @@ def take_best(results: List['ResultsCV'], metrics: str = "huber", last: bool = F
             result = value if result is None or value < result else result
         return result
 
-
     @cached_property
     def keys(self):
         return list(self.evaluation.keys())
@@ -104,11 +103,9 @@ def mins(self):
     def latest(self):
         return {k: (np.array(self.evaluation[k])[-1]) for k in self.keys}
 
-
     def min(self, metrics: str) -> float:
         return self.mins[metrics] if metrics in self.mins else self.mins[metrics+"-mean"]
 
-
     def last(self, metrics: str) -> float:
         return self.latest[metrics] if metrics in self.latest else self.latest[metrics+"-mean"]
 

diff --git a/yspecies/partition.py b/yspecies/partition.py
@@ -181,6 +181,7 @@ def fit(self, X, y=None) -> 'DataPartitioner':
 
     def transform(self, for_partition: Tuple[EncodedFeatures, PartitionParameters]) -> ExpressionPartitions:
         '''
+        :param for_partition:
         :param data: ExpressionDataset
         :param k: number of k-folds in sorted stratification
         :return: partitions

diff --git a/yspecies/preprocess.py b/yspecies/preprocess.py
@@ -55,8 +55,6 @@ def _repr_html_(self):
                f"</table>"
 
 
-
-
 class EncodedFeatures:
 
     def __init__(self, features: FeatureSelection, samples: pd.DataFrame, genes_meta: pd.DataFrame = None):

diff --git a/yspecies/selection.py b/yspecies/selection.py
@@ -137,6 +137,7 @@ def _repr_html_(self):
                f"<tr><td>{self.metrics}</td><td>str({self.validation_species})</td><td>{str(self.shap_dataframe.shape)}</td><td>{str(self.shap_absolute_sum_non_zero.shape)}</td><td>{self.eval_metrics}</td></tr>" \
                f"</table>"
 
+
 @dataclass
 class CrossValidator(TransformerMixin):
     early_stopping_rounds: int = 10
@@ -166,7 +167,7 @@ def fit(self, to_fit: Tuple[ExpressionPartitions, Dict], y=None) -> 'CrossValida
         return self
 
     def regression_model(self, X_train, X_test, y_train, y_test, parameters: Dict, categorical=None,
-                         num_boost_round: int = 250, seed: int = None) -> Booster:
+                         num_boost_round: int = 250, seed: int = None) -> [Booster, list[BasicMetrics]]:
         '''
         trains a regression model
         :param X_train:
@@ -188,7 +189,7 @@ def regression_model(self, X_train, X_test, y_train, y_test, parameters: Dict, c
         gbm = lgb.train(parameters,
                         lgb_train,
                         num_boost_round=num_boost_round,
-                        valid_sets=lgb_eval,
+                        valid_sets=[lgb_eval],
                         evals_result=evals_result,
                         verbose_eval=num_boost_round,
                         callbacks=[stopping_callback]

diff --git a/yspecies/workflow.py b/yspecies/workflow.py
@@ -60,6 +60,7 @@ def fit(self, X, y=None):
     def transform(self, data: Iterable) -> Any:
         return self.fold([d for d in data if self.filter(d)])
 
+
 @dataclass(frozen=True)
 class Repeat(TransformerMixin):
     transformer: Union[TransformerMixin, Pipeline]
@@ -86,7 +87,6 @@ class TupleWith(TransformerMixin):
     map_left: Callable[[Any], Any] = field(default_factory=lambda: lambda x: x)
     map_right: Callable[[Any], Any] = field(default_factory=lambda: lambda x: x)
 
-
     def fit(self, X, y = None):
         return self
Original file line number	Diff line number	Diff line change
Expand Up		@@ -55,8 +55,6 @@ def _repr_html_(self):
		f"</table>"




		class EncodedFeatures:

		def __init__(self, features: FeatureSelection, samples: pd.DataFrame, genes_meta: pd.DataFrame = None):
Expand Down