Skip to content

Commit

Permalink
Merge pull request #493 from MilesCranmer/heap-size-hint
Browse files Browse the repository at this point in the history
Add parameter for specifying `--heap-size-hint` on spawned Julia processes
  • Loading branch information
MilesCranmer authored Dec 24, 2023
2 parents 2ab2a53 + f2294b3 commit 5bf2e55
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 88 deletions.
1 change: 1 addition & 0 deletions pysr/param_groupings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- procs
- multithreading
- cluster_manager
- heap_size_hint_in_bytes
- batching
- batch_size
- precision
Expand Down
183 changes: 98 additions & 85 deletions pysr/sr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from io import StringIO
from multiprocessing import cpu_count
from pathlib import Path
from typing import List, Optional
from typing import Callable, Dict, List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -171,7 +171,7 @@ def _check_assertions(


# Class validation constants
VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]


class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
Expand Down Expand Up @@ -455,6 +455,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
"htc". If set to one of these, PySR will run in distributed
mode, and use `procs` to figure out how many processes to launch.
Default is `None`.
heap_size_hint_in_bytes : int
For multiprocessing, this sets the `--heap-size-hint` parameter
for new Julia processes. This can be configured when using
multi-node distributed compute, to give a hint to each process
about how much memory they can use before aggressive garbage
collection.
batching : bool
Whether to compare population members on small batches during
evolution. Still uses full dataset for comparing against hall
Expand Down Expand Up @@ -653,89 +659,92 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):

def __init__(
self,
model_selection="best",
model_selection: Literal["best", "accuracy", "score"] = "best",
*,
binary_operators=None,
unary_operators=None,
niterations=40,
populations=15,
population_size=33,
max_evals=None,
maxsize=20,
maxdepth=None,
warmup_maxsize_by=0.0,
timeout_in_seconds=None,
constraints=None,
nested_constraints=None,
loss=None,
full_objective=None,
complexity_of_operators=None,
complexity_of_constants=1,
complexity_of_variables=1,
parsimony=0.0032,
dimensional_constraint_penalty=None,
use_frequency=True,
use_frequency_in_tournament=True,
adaptive_parsimony_scaling=20.0,
alpha=0.1,
annealing=False,
early_stop_condition=None,
ncyclesperiteration=550,
fraction_replaced=0.000364,
fraction_replaced_hof=0.035,
weight_add_node=0.79,
weight_insert_node=5.1,
weight_delete_node=1.7,
weight_do_nothing=0.21,
weight_mutate_constant=0.048,
weight_mutate_operator=0.47,
weight_randomize=0.00023,
weight_simplify=0.0020,
weight_optimize=0.0,
crossover_probability=0.066,
skip_mutation_failures=True,
migration=True,
hof_migration=True,
topn=12,
should_simplify=None,
should_optimize_constants=True,
optimizer_algorithm="BFGS",
optimizer_nrestarts=2,
optimize_probability=0.14,
optimizer_iterations=8,
perturbation_factor=0.076,
tournament_selection_n=10,
tournament_selection_p=0.86,
procs=cpu_count(),
multithreading=None,
cluster_manager=None,
batching=False,
batch_size=50,
fast_cycle=False,
turbo=False,
precision=32,
enable_autodiff=False,
binary_operators: Optional[List[str]] = None,
unary_operators: Optional[List[str]] = None,
niterations: int = 40,
populations: int = 15,
population_size: int = 33,
max_evals: Optional[int] = None,
maxsize: int = 20,
maxdepth: Optional[int] = None,
warmup_maxsize_by: Optional[float] = None,
timeout_in_seconds: Optional[float] = None,
constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None,
nested_constraints: Optional[Dict[str, Dict[str, int]]] = None,
loss: Optional[str] = None,
full_objective: Optional[str] = None,
complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
complexity_of_constants: Union[int, float] = 1,
complexity_of_variables: Union[int, float] = 1,
parsimony: float = 0.0032,
dimensional_constraint_penalty: Optional[float] = None,
use_frequency: bool = True,
use_frequency_in_tournament: bool = True,
adaptive_parsimony_scaling: float = 20.0,
alpha: float = 0.1,
annealing: bool = False,
early_stop_condition: Optional[Union[float, str]] = None,
ncyclesperiteration: int = 550,
fraction_replaced: float = 0.000364,
fraction_replaced_hof: float = 0.035,
weight_add_node: float = 0.79,
weight_insert_node: float = 5.1,
weight_delete_node: float = 1.7,
weight_do_nothing: float = 0.21,
weight_mutate_constant: float = 0.048,
weight_mutate_operator: float = 0.47,
weight_randomize: float = 0.00023,
weight_simplify: float = 0.0020,
weight_optimize: float = 0.0,
crossover_probability: float = 0.066,
skip_mutation_failures: bool = True,
migration: bool = True,
hof_migration: bool = True,
topn: int = 12,
should_simplify: Optional[bool] = None,
should_optimize_constants: bool = True,
optimizer_algorithm: Literal["BFGS", "NelderMead"] = "BFGS",
optimizer_nrestarts: int = 2,
optimize_probability: float = 0.14,
optimizer_iterations: int = 8,
perturbation_factor: float = 0.076,
tournament_selection_n: int = 10,
tournament_selection_p: float = 0.86,
procs: int = cpu_count(),
multithreading: Optional[bool] = None,
cluster_manager: Optional[
Literal["slurm", "pbs", "lsf", "sge", "qrsh", "scyld", "htc"]
] = None,
heap_size_hint_in_bytes: Optional[int] = None,
batching: bool = False,
batch_size: int = 50,
fast_cycle: bool = False,
turbo: bool = False,
precision: int = 32,
enable_autodiff: bool = False,
random_state=None,
deterministic=False,
warm_start=False,
verbosity=1,
update_verbosity=None,
print_precision=5,
progress=True,
equation_file=None,
temp_equation_file=False,
tempdir=None,
delete_tempfiles=True,
julia_project=None,
update=False,
output_jax_format=False,
output_torch_format=False,
extra_sympy_mappings=None,
extra_torch_mappings=None,
extra_jax_mappings=None,
denoise=False,
select_k_features=None,
julia_kwargs=None,
deterministic: bool = False,
warm_start: bool = False,
verbosity: int = 1,
update_verbosity: Optional[int] = None,
print_precision: int = 5,
progress: bool = True,
equation_file: Optional[str] = None,
temp_equation_file: bool = False,
tempdir: Optional[str] = None,
delete_tempfiles: bool = True,
julia_project: Optional[str] = None,
update: bool = False,
output_jax_format: bool = False,
output_torch_format: bool = False,
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
extra_torch_mappings: Optional[Dict[Callable, Callable]] = None,
extra_jax_mappings: Optional[Dict[Callable, str]] = None,
denoise: bool = False,
select_k_features: Optional[int] = None,
julia_kwargs: Optional[Dict] = None,
**kwargs,
):
# Hyperparameters
Expand Down Expand Up @@ -800,10 +809,11 @@ def __init__(
# -- Selection parameters
self.tournament_selection_n = tournament_selection_n
self.tournament_selection_p = tournament_selection_p
# Solver parameters
# -- Performance parameters
self.procs = procs
self.multithreading = multithreading
self.cluster_manager = cluster_manager
self.heap_size_hint_in_bytes = heap_size_hint_in_bytes
self.batching = batching
self.batch_size = batch_size
self.fast_cycle = fast_cycle
Expand Down Expand Up @@ -1637,7 +1647,9 @@ def _run(self, X, y, mutated_params, weights, seed):
fraction_replaced_hof=self.fraction_replaced_hof,
should_simplify=self.should_simplify,
should_optimize_constants=self.should_optimize_constants,
warmup_maxsize_by=self.warmup_maxsize_by,
warmup_maxsize_by=0.0
if self.warmup_maxsize_by is None
else self.warmup_maxsize_by,
use_frequency=self.use_frequency,
use_frequency_in_tournament=self.use_frequency_in_tournament,
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
Expand Down Expand Up @@ -1720,6 +1732,7 @@ def _run(self, X, y, mutated_params, weights, seed):
saved_state=self.raw_julia_state_,
return_state=True,
addprocs_function=cluster_manager,
heap_size_hint_in_bytes=self.heap_size_hint_in_bytes,
progress=progress and self.verbosity > 0 and len(y.shape) == 1,
verbosity=int(self.verbosity),
)
Expand Down
4 changes: 2 additions & 2 deletions pysr/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "0.16.5"
__symbolic_regression_jl_version__ = "0.22.5"
__version__ = "0.16.6"
__symbolic_regression_jl_version__ = "0.23.0"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
],
python_requires=">=3.7",
python_requires=">=3.8",
)

0 comments on commit 5bf2e55

Please sign in to comment.