diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 5a81c28dd0..9d9a57f67b 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -47,6 +47,7 @@ MAX_TOKENS: int = 2000 NUM_SAMPLES: int = 1 TEMPERATURE: float = 0.4 +TEMPERATURE_RANGE: list[float] = [0.0, 2.0] class LLM: @@ -61,6 +62,8 @@ class LLM: _max_attempts = 5 # Maximum number of attempts to get prediction response + temperature_range: list[float] = TEMPERATURE_RANGE + def __init__( self, ai_binary: str, @@ -116,16 +119,33 @@ def all_llm_subclasses(cls): yield cls for subcls in cls.__subclasses__(): yield from subcls.all_llm_subclasses() - + @classmethod - def all_llm_names(cls): - """Returns the current model name and all child model names.""" - names = [] + def all_llm_search(cls, attribute: str = None) -> list: + """Returns the desired attribute for all models.""" + out = [] for subcls in cls.all_llm_subclasses(): if hasattr(subcls, 'name') and subcls.name != AIBinaryModel.name: - names.append(subcls.name) - return names + if attribute is not None and hasattr(subcls, attribute): + out.append(getattr(subcls, attribute)) + else: + out.append(subcls) + return out + @classmethod + def all_llm_names(cls) -> list[str]: + """Returns the current model name and all child model names.""" + return cls.all_llm_search('name') + + @classmethod + def all_llm_temperature_ranges(cls) -> dict[str, list[float, float]]: + """Returns the current model and all child temperature ranges.""" + return { + m.name: m.temperature_range + for m in cls.all_llm_search() + if hasattr(m, 'temperature_range') + } + @abstractmethod def estimate_token_num(self, text) -> int: """Estimates the number of tokens in |text|.""" @@ -384,6 +404,7 @@ class Claude(LLM): _max_output_tokens = 4096 _vertex_ai_model = '' context_window = 200000 + temperature_range = [0.0, 1.0] # ================================ Prompt ================================ # def estimate_token_num(self, text) -> int: @@ -548,6 +569,7 @@ class VertexAIModel(GoogleModel): _vertex_ai_model = '' _max_output_tokens = 2048 + temperature_range = [0.0, 1.0] def cloud_setup(self): """Sets Vertex AI cloud location.""" @@ -603,6 +625,8 @@ def ask_llm(self, prompt: prompts.Prompt) -> str: class GeminiModel(VertexAIModel): """Gemini models.""" + temperature_range = [0.0, 2.0] + safety_config = [ generative_models.SafetySetting( category=generative_models.HarmCategory. @@ -660,6 +684,7 @@ class GeminiPro(GeminiModel): name = 'vertex_ai_gemini-pro' _vertex_ai_model = 'gemini-1.0-pro' + temperature_range = [0.0, 1.0] class GeminiUltra(GeminiModel): @@ -670,6 +695,7 @@ class GeminiUltra(GeminiModel): name = 'vertex_ai_gemini-ultra' _vertex_ai_model = 'gemini-ultra' + temperature_range = [0.0, 1.0] class GeminiExperimental(GeminiModel): diff --git a/run_all_experiments.py b/run_all_experiments.py index 24bb0aff55..acd70ee182 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -278,7 +278,10 @@ def parse_args() -> argparse.Namespace: assert args.num_samples > 0, '--num-samples must take a positive integer.' if args.temperature: - assert 2 >= args.temperature >= 0, '--temperature must be within 0 and 2.' + ranges = models.LLM.all_llm_temperature_ranges() + assert ranges[args.model][1] >= args.temperature >= ranges[args.model][0], ( + f'--temperature must be within {ranges[args.model][0]} and ' + f'{ranges[args.model][1]} for model {args.model}.') benchmark_yaml = args.benchmark_yaml if benchmark_yaml: