Skip to content
4 changes: 2 additions & 2 deletions trl/experimental/bco/bco_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ class BCOTrainer(_BaseTrainer):
The optimizer and scheduler to use for training.
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
The function to use to preprocess the logits before computing the metrics.
peft_config (`dict`, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
a PEFT model.
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
Expand Down Expand Up @@ -421,7 +421,7 @@ def __init__(
callbacks: list[TrainerCallback] | None = None,
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
peft_config: dict | None = None,
peft_config: "PeftConfig | None" = None,
Comment thread
cursor[bot] marked this conversation as resolved.
compute_metrics: Callable[[EvalLoopOutput], dict] | None = None,
model_adapter_name: str | None = None,
ref_adapter_name: str | None = None,
Expand Down
4 changes: 2 additions & 2 deletions trl/experimental/cpo/cpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class CPOTrainer(_BaseTrainer):
The optimizer and scheduler to use for training.
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
The function to use to preprocess the logits before computing the metrics.
peft_config (`dict`, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
a PEFT model.
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
Expand Down Expand Up @@ -142,7 +142,7 @@ def __init__(
callbacks: list[TrainerCallback] | None = None,
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
peft_config: dict | None = None,
peft_config: "PeftConfig | None" = None,
compute_metrics: Callable[[EvalLoopOutput], dict] | None = None,
):
if train_dataset is None:
Expand Down
6 changes: 3 additions & 3 deletions trl/experimental/nash_md/nash_md_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@


if is_peft_available():
from peft import PeftModel
from peft import PeftConfig, PeftModel


class GeometricMixtureWrapper(GenerationMixin):
Expand Down Expand Up @@ -133,7 +133,7 @@ class NashMDTrainer(OnlineDPOTrainer):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
peft_config (`dict`):
peft_config ([`~peft.PeftConfig`], *optional*):
The peft config to use for training.
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
Expand Down Expand Up @@ -177,7 +177,7 @@ def __init__(
| FeatureExtractionMixin
| ProcessorMixin
| None = None,
peft_config: dict | None = None,
peft_config: "PeftConfig | None" = None,
compute_metrics: Callable[[EvalPrediction], dict] | None = None,
callbacks: list[TrainerCallback] | None = None,
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
Expand Down
4 changes: 2 additions & 2 deletions trl/experimental/orpo/orpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class ORPOTrainer(_BaseTrainer):
The optimizer and scheduler to use for training.
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
The function to use to preprocess the logits before computing the metrics.
peft_config (`dict`, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
a PEFT model.
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
Expand Down Expand Up @@ -151,7 +151,7 @@ def __init__(
callbacks: list[TrainerCallback] | None = None,
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
peft_config: dict | None = None,
peft_config: "PeftConfig | None" = None,
compute_metrics: Callable[[EvalLoopOutput], dict] | None = None,
):
if train_dataset is None:
Expand Down
4 changes: 2 additions & 2 deletions trl/experimental/prm/prm_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class PRMTrainer(_BaseTrainer):
The optimizer and scheduler to use for training.
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
The function to use to preprocess the logits before computing the metrics.
peft_config (`dict`, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
a PEFT model.
"""
Expand Down Expand Up @@ -167,7 +167,7 @@ def __init__(
None,
),
preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
peft_config: dict | None = None,
peft_config: "PeftConfig | None" = None,
):
if train_dataset is None:
raise ValueError("`train_dataset` is required")
Expand Down
6 changes: 3 additions & 3 deletions trl/experimental/xpo/xpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@


if is_peft_available():
from peft import PeftModel
from peft import PeftConfig, PeftModel


class XPOTrainer(OnlineDPOTrainer):
Expand Down Expand Up @@ -74,7 +74,7 @@ class XPOTrainer(OnlineDPOTrainer):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
peft_config (`dict`):
peft_config ([`~peft.PeftConfig`], *optional*):
The peft config to use for training.
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
Expand Down Expand Up @@ -117,7 +117,7 @@ def __init__(
| ProcessorMixin
| None = None,
reward_processing_classes: PreTrainedTokenizerBase | list[PreTrainedTokenizerBase] | None = None,
peft_config: dict | None = None,
peft_config: "PeftConfig | None" = None,
compute_metrics: Callable[[EvalPrediction], dict] | None = None,
callbacks: list[TrainerCallback] | None = None,
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
Expand Down
Loading